diff --git a/Documentos/TFG_Machine_Learning/Reinforce_Learning.py b/Documentos/TFG_Machine_Learning/Reinforce_Learning.py
index f73da80..c343663 100644
--- a/Documentos/TFG_Machine_Learning/Reinforce_Learning.py
+++ b/Documentos/TFG_Machine_Learning/Reinforce_Learning.py
@@ -258,7 +258,7 @@ class Swimmer(Agent):
         
 
         # update coarse-grained state
-        self.update_state()
+        #self.update_state()
         
         self.t = 0
         self.obstacles = obstacles
@@ -411,9 +411,9 @@ class Swimmer(Agent):
         action_index = np.argmax(Q[state_index])  # find largest entry in this row of Q (i.e. this state)
         Wc=0.175*self.ni/(.5*self.sigma*self.sigma)
         if action_index == 0:   # aumenta 1/8W
-            self.W[2] += 1./8*Wc 
+            self.W[2] += .001/8*Wc 
         elif action_index == 1:                   # disminuye 1/8W
-            self.W[2] -= 1./8*Wc 
+            self.W[2] -= .001/8*Wc 
         else:
             raise Exception ("Action index out of bounds: ", action_index)
         return action_index
@@ -422,9 +422,9 @@ class Swimmer(Agent):
         action_index = np.random.randint(0, 2, 1)
         Wc=0.175*self.ni/(.5*self.sigma*self.sigma)
         if action_index == 0:   # aumenta 1/8W
-            self.W[2] += 1./8*Wc 
+            self.W[2] += .001/8*Wc 
         else:                   # disminuye 1/8W
-            self.W[2] -= 1./8*Wc 
+            self.W[2] -= .001/8*Wc 
         return action_index
 
     def periodic_boundaries(self, isxperiodic=True, isyperiodic=True, iszperiodic=True):
@@ -452,7 +452,7 @@ def tgv(x, z):
     w = -np.cos(x)*np.cos(z)
     return ux, uz, w
 
-def training(alpha0,kappa,alphaMAG,beta,gammaYUK,Pe,dt, ni, sigma, Ns=4000, Ne=5000, gamma=0.999, eps0=0.0, n_updates=1000, \
+def training(alpha0,kappa,alphaMAG,beta,gammaYUK,Pe,dt, ni, sigma, Ns=4000, Ne=5000, Naction=100, gamma=0.999, eps0=0.0, n_updates=1000, \
              RIC=False, method="Qlearning", lr_decay=None, omega=0.85, eps_decay=False, Qin=None):
     # n_updates - how often to plot the trajectory undertaken by the particle during the learning process
     # Ne - number of episodes
@@ -548,9 +548,11 @@ def training(alpha0,kappa,alphaMAG,beta,gammaYUK,Pe,dt, ni, sigma, Ns=4000, Ne=5
             old_s = state_lookup_table[smart.my_state]
 
             # given selected action, update the state
-            naive.interaction_with_obstacles(naive.obstacles, kappa,alphaMAG,beta,gammaYUK,Pe,dt)
-            smart.interaction_with_obstacles(smart.obstacles, kappa,alphaMAG,beta,gammaYUK,Pe,dt)
+            for step in range(Naction):
+                naive.interaction_with_obstacles(naive.obstacles, kappa,alphaMAG,beta,gammaYUK,Pe,dt)
+                smart.interaction_with_obstacles(smart.obstacles, kappa,alphaMAG,beta,gammaYUK,Pe,dt)
             smart.update_state()      # only need to update smart particle since naive has ka = [0, 1] always
+            print(ep, smart.R, smart.W[2])
 
             # calculate reward based on new state
             naive.calc_reward(stage)
@@ -641,7 +643,7 @@ Q = np.random.rand(4, 2)
 
 print(Q)
 
-Ns = 10000
+Ns = 100
 spinner = Swimmer(Ns, 1, 1)
 traj = []
 
@@ -665,16 +667,17 @@ traj = []
 my_alpha0 = 1.0
 my_eps0 = 1.0
 Ne=20
+naction=100
 stepsupdate = 2
 Q, Σ, smart, naive, hist_R_tot_smart, hist_R_tot_naive, smart_stored_histories, naive_stored_histories, \
         state_action_counter, chosen_actions, avg_Q_hist, initial_coords, theta_history, obstacles \
-            = training(my_alpha0, 2.5, 1, 1., 2.5e-4, 10000, 0.00001, 1., 1.,Ns, Ne, 0.999, 0.0, stepsupdate)
+            = training(my_alpha0, 2.5, 1, 1., 2.5e-4, 10000, 0.00001, 1., 1.,Ns, Ne, naction, 0.999, 0.0, stepsupdate)
             
 #print(smart_stored_histories[1][1][3, :])
 #print(len(smart_stored_histories), smart_stored_histories[0].shape)
 fig, ax= plt.subplots(1,1)
 #ax.plot(np.array(traj[::2]) + L/8., np.array(traj[1::2]) + L/8., '.')
-ax.plot(np.array(obstacles[::2]) + L/8., np.array(obstacles[1::2]) + L/8., '.')
+#ax.plot(np.array(obstacles[::2]) + L/8., np.array(obstacles[1::2]) + L/8., '.')
 
 for i in range(0, stepsupdate, Ne):
     ax.plot(smart_stored_histories[i][1][:, 0], smart_stored_histories[i][1][:, 1], '.', label='episode %d'%i)