código training

1 year ago · 59e2b0291c
parent cf3c455059
commit 59e2b0291c
1 changed files with 14 additions and 11 deletions
--- a/Documentos/TFG_Machine_Learning/Reinforce_Learning.py
+++ b/Documentos/TFG_Machine_Learning/Reinforce_Learning.py
@ -258,7 +258,7 @@ class Swimmer(Agent):
        

        # update coarse-grained state
-        self.update_state()
+        #self.update_state()
        
        self.t = 0
        self.obstacles = obstacles
@ -411,9 +411,9 @@ class Swimmer(Agent):
        action_index = np.argmax(Q[state_index])  # find largest entry in this row of Q (i.e. this state)
        Wc=0.175*self.ni/(.5*self.sigma*self.sigma)
        if action_index == 0:   # aumenta 1/8W
-            self.W[2] += 1./8*Wc 
+            self.W[2] += .001/8*Wc 
        elif action_index == 1:                   # disminuye 1/8W
-            self.W[2] -= 1./8*Wc 
+            self.W[2] -= .001/8*Wc 
        else:
            raise Exception ("Action index out of bounds: ", action_index)
        return action_index
@ -422,9 +422,9 @@ class Swimmer(Agent):
        action_index = np.random.randint(0, 2, 1)
        Wc=0.175*self.ni/(.5*self.sigma*self.sigma)
        if action_index == 0:   # aumenta 1/8W
-            self.W[2] += 1./8*Wc 
+            self.W[2] += .001/8*Wc 
        else:                   # disminuye 1/8W
-            self.W[2] -= 1./8*Wc 
+            self.W[2] -= .001/8*Wc 
        return action_index

    def periodic_boundaries(self, isxperiodic=True, isyperiodic=True, iszperiodic=True):
@ -452,7 +452,7 @@ def tgv(x, z):
    w = -np.cos(x)*np.cos(z)
    return ux, uz, w

-def training(alpha0,kappa,alphaMAG,beta,gammaYUK,Pe,dt, ni, sigma, Ns=4000, Ne=5000, gamma=0.999, eps0=0.0, n_updates=1000, \
+def training(alpha0,kappa,alphaMAG,beta,gammaYUK,Pe,dt, ni, sigma, Ns=4000, Ne=5000, Naction=100, gamma=0.999, eps0=0.0, n_updates=1000, \
             RIC=False, method="Qlearning", lr_decay=None, omega=0.85, eps_decay=False, Qin=None):
    # n_updates - how often to plot the trajectory undertaken by the particle during the learning process
    # Ne - number of episodes
@ -548,9 +548,11 @@ def training(alpha0,kappa,alphaMAG,beta,gammaYUK,Pe,dt, ni, sigma, Ns=4000, Ne=5
            old_s = state_lookup_table[smart.my_state]

            # given selected action, update the state
-            naive.interaction_with_obstacles(naive.obstacles, kappa,alphaMAG,beta,gammaYUK,Pe,dt)
-            smart.interaction_with_obstacles(smart.obstacles, kappa,alphaMAG,beta,gammaYUK,Pe,dt)
+            for step in range(Naction):
+                naive.interaction_with_obstacles(naive.obstacles, kappa,alphaMAG,beta,gammaYUK,Pe,dt)
+                smart.interaction_with_obstacles(smart.obstacles, kappa,alphaMAG,beta,gammaYUK,Pe,dt)
            smart.update_state()      # only need to update smart particle since naive has ka = [0, 1] always
+            print(ep, smart.R, smart.W[2])

            # calculate reward based on new state
            naive.calc_reward(stage)
@ -641,7 +643,7 @@ Q = np.random.rand(4, 2)

 print(Q)

-Ns = 10000
+Ns = 100
 spinner = Swimmer(Ns, 1, 1)
 traj = []

@ -665,16 +667,17 @@ traj = []
 my_alpha0 = 1.0
 my_eps0 = 1.0
 Ne=20
+naction=100
 stepsupdate = 2
 Q, Σ, smart, naive, hist_R_tot_smart, hist_R_tot_naive, smart_stored_histories, naive_stored_histories, \
        state_action_counter, chosen_actions, avg_Q_hist, initial_coords, theta_history, obstacles \
-            = training(my_alpha0, 2.5, 1, 1., 2.5e-4, 10000, 0.00001, 1., 1.,Ns, Ne, 0.999, 0.0, stepsupdate)
+            = training(my_alpha0, 2.5, 1, 1., 2.5e-4, 10000, 0.00001, 1., 1.,Ns, Ne, naction, 0.999, 0.0, stepsupdate)
            
 #print(smart_stored_histories[1][1][3, :])
 #print(len(smart_stored_histories), smart_stored_histories[0].shape)
 fig, ax= plt.subplots(1,1)
 #ax.plot(np.array(traj[::2]) + L/8., np.array(traj[1::2]) + L/8., '.')
-ax.plot(np.array(obstacles[::2]) + L/8., np.array(obstacles[1::2]) + L/8., '.')
+#ax.plot(np.array(obstacles[::2]) + L/8., np.array(obstacles[1::2]) + L/8., '.')

 for i in range(0, stepsupdate, Ne):
    ax.plot(smart_stored_histories[i][1][:, 0], smart_stored_histories[i][1][:, 1], '.', label='episode %d'%i)