código training

main
marta 12 months ago
parent cf3c455059
commit 59e2b0291c
  1. 25
      Documentos/TFG_Machine_Learning/Reinforce_Learning.py

@ -258,7 +258,7 @@ class Swimmer(Agent):
# update coarse-grained state
self.update_state()
#self.update_state()
self.t = 0
self.obstacles = obstacles
@ -411,9 +411,9 @@ class Swimmer(Agent):
action_index = np.argmax(Q[state_index]) # find largest entry in this row of Q (i.e. this state)
Wc=0.175*self.ni/(.5*self.sigma*self.sigma)
if action_index == 0: # aumenta 1/8W
self.W[2] += 1./8*Wc
self.W[2] += .001/8*Wc
elif action_index == 1: # disminuye 1/8W
self.W[2] -= 1./8*Wc
self.W[2] -= .001/8*Wc
else:
raise Exception ("Action index out of bounds: ", action_index)
return action_index
@ -422,9 +422,9 @@ class Swimmer(Agent):
action_index = np.random.randint(0, 2, 1)
Wc=0.175*self.ni/(.5*self.sigma*self.sigma)
if action_index == 0: # aumenta 1/8W
self.W[2] += 1./8*Wc
self.W[2] += .001/8*Wc
else: # disminuye 1/8W
self.W[2] -= 1./8*Wc
self.W[2] -= .001/8*Wc
return action_index
def periodic_boundaries(self, isxperiodic=True, isyperiodic=True, iszperiodic=True):
@ -452,7 +452,7 @@ def tgv(x, z):
w = -np.cos(x)*np.cos(z)
return ux, uz, w
def training(alpha0,kappa,alphaMAG,beta,gammaYUK,Pe,dt, ni, sigma, Ns=4000, Ne=5000, gamma=0.999, eps0=0.0, n_updates=1000, \
def training(alpha0,kappa,alphaMAG,beta,gammaYUK,Pe,dt, ni, sigma, Ns=4000, Ne=5000, Naction=100, gamma=0.999, eps0=0.0, n_updates=1000, \
RIC=False, method="Qlearning", lr_decay=None, omega=0.85, eps_decay=False, Qin=None):
# n_updates - how often to plot the trajectory undertaken by the particle during the learning process
# Ne - number of episodes
@ -548,9 +548,11 @@ def training(alpha0,kappa,alphaMAG,beta,gammaYUK,Pe,dt, ni, sigma, Ns=4000, Ne=5
old_s = state_lookup_table[smart.my_state]
# given selected action, update the state
naive.interaction_with_obstacles(naive.obstacles, kappa,alphaMAG,beta,gammaYUK,Pe,dt)
smart.interaction_with_obstacles(smart.obstacles, kappa,alphaMAG,beta,gammaYUK,Pe,dt)
for step in range(Naction):
naive.interaction_with_obstacles(naive.obstacles, kappa,alphaMAG,beta,gammaYUK,Pe,dt)
smart.interaction_with_obstacles(smart.obstacles, kappa,alphaMAG,beta,gammaYUK,Pe,dt)
smart.update_state() # only need to update smart particle since naive has ka = [0, 1] always
print(ep, smart.R, smart.W[2])
# calculate reward based on new state
naive.calc_reward(stage)
@ -641,7 +643,7 @@ Q = np.random.rand(4, 2)
print(Q)
Ns = 10000
Ns = 100
spinner = Swimmer(Ns, 1, 1)
traj = []
@ -665,16 +667,17 @@ traj = []
my_alpha0 = 1.0
my_eps0 = 1.0
Ne=20
naction=100
stepsupdate = 2
Q, Σ, smart, naive, hist_R_tot_smart, hist_R_tot_naive, smart_stored_histories, naive_stored_histories, \
state_action_counter, chosen_actions, avg_Q_hist, initial_coords, theta_history, obstacles \
= training(my_alpha0, 2.5, 1, 1., 2.5e-4, 10000, 0.00001, 1., 1.,Ns, Ne, 0.999, 0.0, stepsupdate)
= training(my_alpha0, 2.5, 1, 1., 2.5e-4, 10000, 0.00001, 1., 1.,Ns, Ne, naction, 0.999, 0.0, stepsupdate)
#print(smart_stored_histories[1][1][3, :])
#print(len(smart_stored_histories), smart_stored_histories[0].shape)
fig, ax= plt.subplots(1,1)
#ax.plot(np.array(traj[::2]) + L/8., np.array(traj[1::2]) + L/8., '.')
ax.plot(np.array(obstacles[::2]) + L/8., np.array(obstacles[1::2]) + L/8., '.')
#ax.plot(np.array(obstacles[::2]) + L/8., np.array(obstacles[1::2]) + L/8., '.')
for i in range(0, stepsupdate, Ne):
ax.plot(smart_stored_histories[i][1][:, 0], smart_stored_histories[i][1][:, 1], '.', label='episode %d'%i)

Loading…
Cancel
Save