código training

1 year ago · cf3c455059
parent be48f30401
commit cf3c455059
1 changed files with 80 additions and 42 deletions
--- a/Documentos/TFG_Machine_Learning/Reinforce_Learning.py
+++ b/Documentos/TFG_Machine_Learning/Reinforce_Learning.py
@ -20,7 +20,7 @@ from wand.image import Image as WImage
 # sns.set(palette="husl",font_scale=1)
 # %config InlineBackend.figure_format = 'retina'
 import copy
-np.random.seed(4032)
+np.random.seed(48632)
 #%load_ext line_profiler


@ -40,7 +40,7 @@ N_states = 4 # number of states - one for each coarse-grained degree of vorticit
 N_actions = 2 # number of actions - one for each coarse-grained swimming direction

 # numerical parameters
-dt = 0.0001 # timestep size
+#dt = 0.00001 # timestep size



@ -128,7 +128,7 @@ class Swimmer(Agent):
            # Comprobamos si esta inicialmente esta dentro de un obstáculo
            for i in range(len(self.obstacles)//2):
                obstacle_position = np.array([self.obstacles[2*i], self.obstacles[2*i+1], 0])
-                if np.linalg.norm(self.X - obstacle_position) < 0.5*self.sigma:
+                if np.linalg.norm(self.X - obstacle_position) < 0.8*self.sigma:
                    valid_initial_position = False
                    break
         
@ -145,7 +145,7 @@ class Swimmer(Agent):
        
        #distancia entre el swimmer y el obstáculo
        self.R=np.random.uniform(0, 2.5, 1)
-        print(self.R)
+        #print(self.R)
        
        # preferred swimming direction (equal to [1,0], [0,1], [-1,0], or [0,-1])
        self.ka = np.array([0,1])
@ -211,6 +211,7 @@ class Swimmer(Agent):

      #actualizamos la posición del spinner
      self.X[:-1] += dr
+      self.X_total[:-1] += dr
      self.U = np.array([dr[0]/dt, dr[1]/dt, 0])
      
      all_dists = np.empty(len(obstacles)//2)
@ -220,27 +221,47 @@ class Swimmer(Agent):

      self.R = np.amin(all_dists)      
      #comprobamos que el spinner siga dentro del box periódico
+      self.history_X_total.append(self.X_total)
+      self.periodic_boundaries()
+      self.history_X.append(self.X)
      self.check_in_box()


-    def reinitialize(self):
-        self.X = np.array([np.random.uniform(0, L), np.random.uniform(0, L)])
-        self.X_total = self.X
+    def reinitialize(self, obstacles):
+        # absolute position. -inf. <= x_total < inf. and -inf. <= z_total < inf.
+        self.X = self.history_X[0]
+        self.X_total = self.history_X_total[0]

+        # particle orientation
        self.theta = np.random.uniform(0, 2*np.pi) # polar angle theta in the x-z plane
-        self.p = np.array([np.cos(self.theta), np.sin(self.theta)]) # p = [px, pz]^T # orientación del nadador
-
-        self.U = np.zeros(2)
-        self.W = np.array([0, 0, 1])
+        self.p = np.array([np.cos(self.theta), np.sin(self.theta)]) # p = [px, pz]^T

+        # translational and rotational velocity
+        self.U = np.zeros(3, float)
+        self.W = np.array([0., 0., 1.]) #Velocidad angular aleatoria
+        
+        #distancia entre el swimmer y el obstáculo
+        self.R=np.random.uniform(0, 2.5, 1)
+        #print(self.R)
+        
+        # preferred swimming direction (equal to [1,0], [0,1], [-1,0], or [0,-1])
        self.ka = np.array([0,1])

-        self.history_X = [self.X]
-        self.history_X_total = [self.X_total]
+        # history of local and global position. Only store information for this episode.
+        self.history_X.clear()
+        self.history_X_total.clear()
+        self.history_X.append(self.X)
+        self.history_X_total.append(self.X_total) 
+
+        # local vorticity at the current location
+        _, _, self.w = tgv(self.X[0], self.X[1])
        
-        self.R=np.random.uniform(0, 2.5)
+
+        # update coarse-grained state
+        self.update_state()
        
        self.t = 0
+        self.obstacles = obstacles

    def update_kinematics(self, Φ, Ψ, D0 = 0, Dr = 0, int_method = "euler"): # Actualiza la posición y orientación del nadador según un método de integración especificado.
        if int_method == "rk45":
@ -431,7 +452,7 @@ def tgv(x, z):
    w = -np.cos(x)*np.cos(z)
    return ux, uz, w

-def training(alpha0, Φ, Ψ, Ns=4000, Ne=5000, gamma=0.999, eps0=0.0, D0=0, Dr=0, n_updates=1000, \
+def training(alpha0,kappa,alphaMAG,beta,gammaYUK,Pe,dt, ni, sigma, Ns=4000, Ne=5000, gamma=0.999, eps0=0.0, n_updates=1000, \
             RIC=False, method="Qlearning", lr_decay=None, omega=0.85, eps_decay=False, Qin=None):
    # n_updates - how often to plot the trajectory undertaken by the particle during the learning process
    # Ne - number of episodes
@ -467,9 +488,10 @@ def training(alpha0, Φ, Ψ, Ns=4000, Ne=5000, gamma=0.999, eps0=0.0, D0=0, Dr=0
    state_action_counter = np.zeros((N_states,N_actions))

    # initialize a naive and a smart gyrotactic particle
-    naive = Swimmer(Ns)
-    smart = Swimmer(Ns)
-
+    naive = Swimmer(Ns, ni, sigma)
+    smart = Swimmer(Ns, ni, sigma)
+    naive.obstacles = smart.obstacles
+    obstacles=naive.obstacles
    # initialize Q matrix to large value
    if method=="doubleQ":
        Q1 = L*Ns*np.ones((4, 2))
@ -483,20 +505,21 @@ def training(alpha0, Φ, Ψ, Ns=4000, Ne=5000, gamma=0.999, eps0=0.0, D0=0, Dr=0
    avg_Q_history = np.zeros((Ne,4,2))

    # store initial position and orientation for each episode
-    initial_coords = np.zeros((Ne,3))
+    initial_coords = np.empty([Ne, 3], float)
+    for k in range(Ne):
+        initial_coords[k,:]=smart.X

    # iterate over episodes
    k = 0
    for ep in tqdm(range(Ne)):

        # assign random orientation and position
-        smart.reinitialize()
-        naive.reinitialize()
+        smart.reinitialize(obstacles)
+        naive.reinitialize(obstacles)
        naive = copy.deepcopy(smart) # have naive and smart share initial conditions for visualization purposes

        # store initialization
-        initial_coords[ep,0:2] = smart.X
-        initial_coords[ep,2] = smart.theta
+        initial_coords[ep,0:3] = smart.X

        # save selected actions and particle orientation for last episodes
        if ep == Ne - 1:
@ -525,8 +548,8 @@ def training(alpha0, Φ, Ψ, Ns=4000, Ne=5000, gamma=0.999, eps0=0.0, D0=0, Dr=0
            old_s = state_lookup_table[smart.my_state]

            # given selected action, update the state
-            naive.update_kinematics(Φ, Ψ, D0, Dr)
-            smart.update_kinematics(Φ, Ψ, D0, Dr)
+            naive.interaction_with_obstacles(naive.obstacles, kappa,alphaMAG,beta,gammaYUK,Pe,dt)
+            smart.interaction_with_obstacles(smart.obstacles, kappa,alphaMAG,beta,gammaYUK,Pe,dt)
            smart.update_state()      # only need to update smart particle since naive has ka = [0, 1] always

            # calculate reward based on new state
@ -597,7 +620,7 @@ def training(alpha0, Φ, Ψ, Ns=4000, Ne=5000, gamma=0.999, eps0=0.0, D0=0, Dr=0
        # save optimal policy
        if ep==Ne-1:
            filename = "Policies/Q_alpha_" + str(alpha).replace(".","d") + "_Ns_" + str(Ns) + "_Ne_" + str(Ne) + \
-                    "_Φ_" + str(Φ).replace(".","d") + "_Ψ_" + str(Ψ).replace(".","d") + "_eps_" \
+                    "_sigma_" + str(sigma).replace(".","d") + "_Pe_" + str(Pe).replace(".","d") + "_eps_" \
                    + str(eff_eps).replace(".","d") + "_epsdecay_" + str(eps_decay)
            if lr_decay: filename = filename + "_omega_" + str(omega)
            if method=="doubleQ": filename = filename + "_" + str(method)
@ -606,7 +629,7 @@ def training(alpha0, Φ, Ψ, Ns=4000, Ne=5000, gamma=0.999, eps0=0.0, D0=0, Dr=0
            np.save(filename, Qout)

    return Qout, Σ, smart, naive, hist_R_tot_smart, hist_R_tot_naive, smart_stored_histories, naive_stored_histories, \
-        state_action_counter, chosen_actions, avg_Q_history, initial_coords, theta_history
+        state_action_counter, chosen_actions, avg_Q_history, initial_coords, theta_history, obstacles
 #Plot

 Q = np.random.rand(4, 2)
@ -618,34 +641,49 @@ Q = np.random.rand(4, 2)

 print(Q)

-Ns = 5000
+Ns = 10000
 spinner = Swimmer(Ns, 1, 1)
 traj = []

-obstacles = spinner.generate_obstacles()
-for i in range(Ns):
-  spinner.interaction_with_obstacles(obstacles, 2.5, 1, 1., 2.5e-4, 10000, 0.001)
-  traj.append(spinner.X[0])
-  traj.append(spinner.X[1])
+#obstacles = spinner.generate_obstacles()
+#for i in range(Ns):
+  #spinner.interaction_with_obstacles(obstacles, 2.5, 1, 1., 2.5e-4, 10000, 0.00001)
+  #traj.append(spinner.X[0])
+  #traj.append(spinner.X[1])
  
-  spinner.periodic_boundaries()
+  #spinner.periodic_boundaries()

-  action_index = spinner.take_greedy_action(Q)
+  #action_index = spinner.take_greedy_action(Q)
  

-  spinner.update_state()
+  #spinner.update_state()
  
 
  
  #print("Mi estado", spinner.my_state)
  #print("Valor de Wz después de tomar la acción:", spinner.W[2])
-
+my_alpha0 = 1.0
+my_eps0 = 1.0
+Ne=20
+stepsupdate = 2
+Q, Σ, smart, naive, hist_R_tot_smart, hist_R_tot_naive, smart_stored_histories, naive_stored_histories, \
+        state_action_counter, chosen_actions, avg_Q_hist, initial_coords, theta_history, obstacles \
+            = training(my_alpha0, 2.5, 1, 1., 2.5e-4, 10000, 0.00001, 1., 1.,Ns, Ne, 0.999, 0.0, stepsupdate)
+            
+#print(smart_stored_histories[1][1][3, :])
+#print(len(smart_stored_histories), smart_stored_histories[0].shape)
 fig, ax= plt.subplots(1,1)
-ax.plot(traj[::2], traj[1::2], '.')
-ax.plot(obstacles[::2], obstacles[1::2], '.')
-ax.set_aspect('equal')
-print(obstacles[::2])
-plt.show()
+#ax.plot(np.array(traj[::2]) + L/8., np.array(traj[1::2]) + L/8., '.')
+ax.plot(np.array(obstacles[::2]) + L/8., np.array(obstacles[1::2]) + L/8., '.')
+
+for i in range(0, stepsupdate, Ne):
+    ax.plot(smart_stored_histories[i][1][:, 0], smart_stored_histories[i][1][:, 1], '.', label='episode %d'%i)
+    if i == Ne-1:
+        ax.plot(naive_stored_histories[i][1][:, 0], naive_stored_histories[i][1][:, 1], '.', label='naive spinner')


+ax.set_aspect('equal')
+#ax.legend()
+plt.show()
+