|
|
|
@ -258,7 +258,7 @@ class Swimmer(Agent): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# update coarse-grained state |
|
|
|
|
self.update_state() |
|
|
|
|
#self.update_state() |
|
|
|
|
|
|
|
|
|
self.t = 0 |
|
|
|
|
self.obstacles = obstacles |
|
|
|
@ -411,9 +411,9 @@ class Swimmer(Agent): |
|
|
|
|
action_index = np.argmax(Q[state_index]) # find largest entry in this row of Q (i.e. this state) |
|
|
|
|
Wc=0.175*self.ni/(.5*self.sigma*self.sigma) |
|
|
|
|
if action_index == 0: # aumenta 1/8W |
|
|
|
|
self.W[2] += 1./8*Wc |
|
|
|
|
self.W[2] += .001/8*Wc |
|
|
|
|
elif action_index == 1: # disminuye 1/8W |
|
|
|
|
self.W[2] -= 1./8*Wc |
|
|
|
|
self.W[2] -= .001/8*Wc |
|
|
|
|
else: |
|
|
|
|
raise Exception ("Action index out of bounds: ", action_index) |
|
|
|
|
return action_index |
|
|
|
@ -422,9 +422,9 @@ class Swimmer(Agent): |
|
|
|
|
action_index = np.random.randint(0, 2, 1) |
|
|
|
|
Wc=0.175*self.ni/(.5*self.sigma*self.sigma) |
|
|
|
|
if action_index == 0: # aumenta 1/8W |
|
|
|
|
self.W[2] += 1./8*Wc |
|
|
|
|
self.W[2] += .001/8*Wc |
|
|
|
|
else: # disminuye 1/8W |
|
|
|
|
self.W[2] -= 1./8*Wc |
|
|
|
|
self.W[2] -= .001/8*Wc |
|
|
|
|
return action_index |
|
|
|
|
|
|
|
|
|
def periodic_boundaries(self, isxperiodic=True, isyperiodic=True, iszperiodic=True): |
|
|
|
@ -452,7 +452,7 @@ def tgv(x, z): |
|
|
|
|
w = -np.cos(x)*np.cos(z) |
|
|
|
|
return ux, uz, w |
|
|
|
|
|
|
|
|
|
def training(alpha0,kappa,alphaMAG,beta,gammaYUK,Pe,dt, ni, sigma, Ns=4000, Ne=5000, gamma=0.999, eps0=0.0, n_updates=1000, \ |
|
|
|
|
def training(alpha0,kappa,alphaMAG,beta,gammaYUK,Pe,dt, ni, sigma, Ns=4000, Ne=5000, Naction=100, gamma=0.999, eps0=0.0, n_updates=1000, \ |
|
|
|
|
RIC=False, method="Qlearning", lr_decay=None, omega=0.85, eps_decay=False, Qin=None): |
|
|
|
|
# n_updates - how often to plot the trajectory undertaken by the particle during the learning process |
|
|
|
|
# Ne - number of episodes |
|
|
|
@ -548,9 +548,11 @@ def training(alpha0,kappa,alphaMAG,beta,gammaYUK,Pe,dt, ni, sigma, Ns=4000, Ne=5 |
|
|
|
|
old_s = state_lookup_table[smart.my_state] |
|
|
|
|
|
|
|
|
|
# given selected action, update the state |
|
|
|
|
naive.interaction_with_obstacles(naive.obstacles, kappa,alphaMAG,beta,gammaYUK,Pe,dt) |
|
|
|
|
smart.interaction_with_obstacles(smart.obstacles, kappa,alphaMAG,beta,gammaYUK,Pe,dt) |
|
|
|
|
for step in range(Naction): |
|
|
|
|
naive.interaction_with_obstacles(naive.obstacles, kappa,alphaMAG,beta,gammaYUK,Pe,dt) |
|
|
|
|
smart.interaction_with_obstacles(smart.obstacles, kappa,alphaMAG,beta,gammaYUK,Pe,dt) |
|
|
|
|
smart.update_state() # only need to update smart particle since naive has ka = [0, 1] always |
|
|
|
|
print(ep, smart.R, smart.W[2]) |
|
|
|
|
|
|
|
|
|
# calculate reward based on new state |
|
|
|
|
naive.calc_reward(stage) |
|
|
|
@ -641,7 +643,7 @@ Q = np.random.rand(4, 2) |
|
|
|
|
|
|
|
|
|
print(Q) |
|
|
|
|
|
|
|
|
|
Ns = 10000 |
|
|
|
|
Ns = 100 |
|
|
|
|
spinner = Swimmer(Ns, 1, 1) |
|
|
|
|
traj = [] |
|
|
|
|
|
|
|
|
@ -665,16 +667,17 @@ traj = [] |
|
|
|
|
my_alpha0 = 1.0 |
|
|
|
|
my_eps0 = 1.0 |
|
|
|
|
Ne=20 |
|
|
|
|
naction=100 |
|
|
|
|
stepsupdate = 2 |
|
|
|
|
Q, Σ, smart, naive, hist_R_tot_smart, hist_R_tot_naive, smart_stored_histories, naive_stored_histories, \ |
|
|
|
|
state_action_counter, chosen_actions, avg_Q_hist, initial_coords, theta_history, obstacles \ |
|
|
|
|
= training(my_alpha0, 2.5, 1, 1., 2.5e-4, 10000, 0.00001, 1., 1.,Ns, Ne, 0.999, 0.0, stepsupdate) |
|
|
|
|
= training(my_alpha0, 2.5, 1, 1., 2.5e-4, 10000, 0.00001, 1., 1.,Ns, Ne, naction, 0.999, 0.0, stepsupdate) |
|
|
|
|
|
|
|
|
|
#print(smart_stored_histories[1][1][3, :]) |
|
|
|
|
#print(len(smart_stored_histories), smart_stored_histories[0].shape) |
|
|
|
|
fig, ax= plt.subplots(1,1) |
|
|
|
|
#ax.plot(np.array(traj[::2]) + L/8., np.array(traj[1::2]) + L/8., '.') |
|
|
|
|
ax.plot(np.array(obstacles[::2]) + L/8., np.array(obstacles[1::2]) + L/8., '.') |
|
|
|
|
#ax.plot(np.array(obstacles[::2]) + L/8., np.array(obstacles[1::2]) + L/8., '.') |
|
|
|
|
|
|
|
|
|
for i in range(0, stepsupdate, Ne): |
|
|
|
|
ax.plot(smart_stored_histories[i][1][:, 0], smart_stored_histories[i][1][:, 1], '.', label='episode %d'%i) |
|
|
|
|