From cf3c455059f03a9883224e6accc380530884afa5 Mon Sep 17 00:00:00 2001
From: marta <marta.valtuenna@estudiante.uam.es>
Date: Thu, 21 Mar 2024 13:35:01 +0100
Subject: [PATCH] =?UTF-8?q?c=C3=B3digo=20training?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../Reinforce_Learning.py                     | 122 ++++++++++++------
 1 file changed, 80 insertions(+), 42 deletions(-)

diff --git a/Documentos/TFG_Machine_Learning/Reinforce_Learning.py b/Documentos/TFG_Machine_Learning/Reinforce_Learning.py
index d38c9bb..f73da80 100644
--- a/Documentos/TFG_Machine_Learning/Reinforce_Learning.py
+++ b/Documentos/TFG_Machine_Learning/Reinforce_Learning.py
@@ -20,7 +20,7 @@ from wand.image import Image as WImage
 # sns.set(palette="husl",font_scale=1)
 # %config InlineBackend.figure_format = 'retina'
 import copy
-np.random.seed(4032)
+np.random.seed(48632)
 #%load_ext line_profiler
 
 
@@ -40,7 +40,7 @@ N_states = 4 # number of states - one for each coarse-grained degree of vorticit
 N_actions = 2 # number of actions - one for each coarse-grained swimming direction
 
 # numerical parameters
-dt = 0.0001 # timestep size
+#dt = 0.00001 # timestep size
 
 
 
@@ -128,7 +128,7 @@ class Swimmer(Agent):
             # Comprobamos si esta inicialmente esta dentro de un obstáculo
             for i in range(len(self.obstacles)//2):
                 obstacle_position = np.array([self.obstacles[2*i], self.obstacles[2*i+1], 0])
-                if np.linalg.norm(self.X - obstacle_position) < 0.5*self.sigma:
+                if np.linalg.norm(self.X - obstacle_position) < 0.8*self.sigma:
                     valid_initial_position = False
                     break
          
@@ -145,7 +145,7 @@ class Swimmer(Agent):
         
         #distancia entre el swimmer y el obstáculo
         self.R=np.random.uniform(0, 2.5, 1)
-        print(self.R)
+        #print(self.R)
         
         # preferred swimming direction (equal to [1,0], [0,1], [-1,0], or [0,-1])
         self.ka = np.array([0,1])
@@ -211,6 +211,7 @@ class Swimmer(Agent):
 
       #actualizamos la posición del spinner
       self.X[:-1] += dr
+      self.X_total[:-1] += dr
       self.U = np.array([dr[0]/dt, dr[1]/dt, 0])
       
       all_dists = np.empty(len(obstacles)//2)
@@ -220,27 +221,47 @@ class Swimmer(Agent):
 
       self.R = np.amin(all_dists)      
       #comprobamos que el spinner siga dentro del box periódico
+      self.history_X_total.append(self.X_total)
+      self.periodic_boundaries()
+      self.history_X.append(self.X)
       self.check_in_box()
 
 
-    def reinitialize(self):
-        self.X = np.array([np.random.uniform(0, L), np.random.uniform(0, L)])
-        self.X_total = self.X
+    def reinitialize(self, obstacles):
+        # absolute position. -inf. <= x_total < inf. and -inf. <= z_total < inf.
+        self.X = self.history_X[0]
+        self.X_total = self.history_X_total[0]
 
+        # particle orientation
         self.theta = np.random.uniform(0, 2*np.pi) # polar angle theta in the x-z plane
-        self.p = np.array([np.cos(self.theta), np.sin(self.theta)]) # p = [px, pz]^T # orientación del nadador
-
-        self.U = np.zeros(2)
-        self.W = np.array([0, 0, 1])
+        self.p = np.array([np.cos(self.theta), np.sin(self.theta)]) # p = [px, pz]^T
 
+        # translational and rotational velocity
+        self.U = np.zeros(3, float)
+        self.W = np.array([0., 0., 1.]) #Velocidad angular aleatoria
+        
+        #distancia entre el swimmer y el obstáculo
+        self.R=np.random.uniform(0, 2.5, 1)
+        #print(self.R)
+        
+        # preferred swimming direction (equal to [1,0], [0,1], [-1,0], or [0,-1])
         self.ka = np.array([0,1])
 
-        self.history_X = [self.X]
-        self.history_X_total = [self.X_total]
+        # history of local and global position. Only store information for this episode.
+        self.history_X.clear()
+        self.history_X_total.clear()
+        self.history_X.append(self.X)
+        self.history_X_total.append(self.X_total) 
+
+        # local vorticity at the current location
+        _, _, self.w = tgv(self.X[0], self.X[1])
         
-        self.R=np.random.uniform(0, 2.5)
+
+        # update coarse-grained state
+        self.update_state()
         
         self.t = 0
+        self.obstacles = obstacles
 
     def update_kinematics(self, Φ, Ψ, D0 = 0, Dr = 0, int_method = "euler"): # Actualiza la posición y orientación del nadador según un método de integración especificado.
         if int_method == "rk45":
@@ -431,7 +452,7 @@ def tgv(x, z):
     w = -np.cos(x)*np.cos(z)
     return ux, uz, w
 
-def training(alpha0, Φ, Ψ, Ns=4000, Ne=5000, gamma=0.999, eps0=0.0, D0=0, Dr=0, n_updates=1000, \
+def training(alpha0,kappa,alphaMAG,beta,gammaYUK,Pe,dt, ni, sigma, Ns=4000, Ne=5000, gamma=0.999, eps0=0.0, n_updates=1000, \
              RIC=False, method="Qlearning", lr_decay=None, omega=0.85, eps_decay=False, Qin=None):
     # n_updates - how often to plot the trajectory undertaken by the particle during the learning process
     # Ne - number of episodes
@@ -467,9 +488,10 @@ def training(alpha0, Φ, Ψ, Ns=4000, Ne=5000, gamma=0.999, eps0=0.0, D0=0, Dr=0
     state_action_counter = np.zeros((N_states,N_actions))
 
     # initialize a naive and a smart gyrotactic particle
-    naive = Swimmer(Ns)
-    smart = Swimmer(Ns)
-
+    naive = Swimmer(Ns, ni, sigma)
+    smart = Swimmer(Ns, ni, sigma)
+    naive.obstacles = smart.obstacles
+    obstacles=naive.obstacles
     # initialize Q matrix to large value
     if method=="doubleQ":
         Q1 = L*Ns*np.ones((4, 2))
@@ -483,20 +505,21 @@ def training(alpha0, Φ, Ψ, Ns=4000, Ne=5000, gamma=0.999, eps0=0.0, D0=0, Dr=0
     avg_Q_history = np.zeros((Ne,4,2))
 
     # store initial position and orientation for each episode
-    initial_coords = np.zeros((Ne,3))
+    initial_coords = np.empty([Ne, 3], float)
+    for k in range(Ne):
+        initial_coords[k,:]=smart.X
 
     # iterate over episodes
     k = 0
     for ep in tqdm(range(Ne)):
 
         # assign random orientation and position
-        smart.reinitialize()
-        naive.reinitialize()
+        smart.reinitialize(obstacles)
+        naive.reinitialize(obstacles)
         naive = copy.deepcopy(smart) # have naive and smart share initial conditions for visualization purposes
 
         # store initialization
-        initial_coords[ep,0:2] = smart.X
-        initial_coords[ep,2] = smart.theta
+        initial_coords[ep,0:3] = smart.X
 
         # save selected actions and particle orientation for last episodes
         if ep == Ne - 1:
@@ -525,8 +548,8 @@ def training(alpha0, Φ, Ψ, Ns=4000, Ne=5000, gamma=0.999, eps0=0.0, D0=0, Dr=0
             old_s = state_lookup_table[smart.my_state]
 
             # given selected action, update the state
-            naive.update_kinematics(Φ, Ψ, D0, Dr)
-            smart.update_kinematics(Φ, Ψ, D0, Dr)
+            naive.interaction_with_obstacles(naive.obstacles, kappa,alphaMAG,beta,gammaYUK,Pe,dt)
+            smart.interaction_with_obstacles(smart.obstacles, kappa,alphaMAG,beta,gammaYUK,Pe,dt)
             smart.update_state()      # only need to update smart particle since naive has ka = [0, 1] always
 
             # calculate reward based on new state
@@ -597,7 +620,7 @@ def training(alpha0, Φ, Ψ, Ns=4000, Ne=5000, gamma=0.999, eps0=0.0, D0=0, Dr=0
         # save optimal policy
         if ep==Ne-1:
             filename = "Policies/Q_alpha_" + str(alpha).replace(".","d") + "_Ns_" + str(Ns) + "_Ne_" + str(Ne) + \
-                    "_Φ_" + str(Φ).replace(".","d") + "_Ψ_" + str(Ψ).replace(".","d") + "_eps_" \
+                    "_sigma_" + str(sigma).replace(".","d") + "_Pe_" + str(Pe).replace(".","d") + "_eps_" \
                     + str(eff_eps).replace(".","d") + "_epsdecay_" + str(eps_decay)
             if lr_decay: filename = filename + "_omega_" + str(omega)
             if method=="doubleQ": filename = filename + "_" + str(method)
@@ -606,7 +629,7 @@ def training(alpha0, Φ, Ψ, Ns=4000, Ne=5000, gamma=0.999, eps0=0.0, D0=0, Dr=0
             np.save(filename, Qout)
 
     return Qout, Σ, smart, naive, hist_R_tot_smart, hist_R_tot_naive, smart_stored_histories, naive_stored_histories, \
-        state_action_counter, chosen_actions, avg_Q_history, initial_coords, theta_history
+        state_action_counter, chosen_actions, avg_Q_history, initial_coords, theta_history, obstacles
 #Plot
 
 Q = np.random.rand(4, 2)
@@ -618,34 +641,49 @@ Q = np.random.rand(4, 2)
 
 print(Q)
 
-Ns = 5000
+Ns = 10000
 spinner = Swimmer(Ns, 1, 1)
 traj = []
 
-obstacles = spinner.generate_obstacles()
-for i in range(Ns):
-  spinner.interaction_with_obstacles(obstacles, 2.5, 1, 1., 2.5e-4, 10000, 0.001)
-  traj.append(spinner.X[0])
-  traj.append(spinner.X[1])
+#obstacles = spinner.generate_obstacles()
+#for i in range(Ns):
+  #spinner.interaction_with_obstacles(obstacles, 2.5, 1, 1., 2.5e-4, 10000, 0.00001)
+  #traj.append(spinner.X[0])
+  #traj.append(spinner.X[1])
   
-  spinner.periodic_boundaries()
+  #spinner.periodic_boundaries()
 
-  action_index = spinner.take_greedy_action(Q)
+  #action_index = spinner.take_greedy_action(Q)
   
 
-  spinner.update_state()
+  #spinner.update_state()
   
  
   
   #print("Mi estado", spinner.my_state)
   #print("Valor de Wz después de tomar la acción:", spinner.W[2])
-
+my_alpha0 = 1.0
+my_eps0 = 1.0
+Ne=20
+stepsupdate = 2
+Q, Σ, smart, naive, hist_R_tot_smart, hist_R_tot_naive, smart_stored_histories, naive_stored_histories, \
+        state_action_counter, chosen_actions, avg_Q_hist, initial_coords, theta_history, obstacles \
+            = training(my_alpha0, 2.5, 1, 1., 2.5e-4, 10000, 0.00001, 1., 1.,Ns, Ne, 0.999, 0.0, stepsupdate)
+            
+#print(smart_stored_histories[1][1][3, :])
+#print(len(smart_stored_histories), smart_stored_histories[0].shape)
 fig, ax= plt.subplots(1,1)
-ax.plot(traj[::2], traj[1::2], '.')
-ax.plot(obstacles[::2], obstacles[1::2], '.')
-ax.set_aspect('equal')
-print(obstacles[::2])
-plt.show()
+#ax.plot(np.array(traj[::2]) + L/8., np.array(traj[1::2]) + L/8., '.')
+ax.plot(np.array(obstacles[::2]) + L/8., np.array(obstacles[1::2]) + L/8., '.')
+
+for i in range(0, stepsupdate, Ne):
+    ax.plot(smart_stored_histories[i][1][:, 0], smart_stored_histories[i][1][:, 1], '.', label='episode %d'%i)
+    if i == Ne-1:
+        ax.plot(naive_stored_histories[i][1][:, 0], naive_stored_histories[i][1][:, 1], '.', label='naive spinner')
 
 
+ax.set_aspect('equal')
+#ax.legend()
+plt.show()
+