Commit 29fcf1f9 authored by holgadoa's avatar holgadoa

removing sofmax and bug

parent b173b190
...@@ -146,7 +146,7 @@ if __name__ == "__main__": ...@@ -146,7 +146,7 @@ if __name__ == "__main__":
torch.save(net_act.state_dict(), fname) torch.save(net_act.state_dict(), fname)
best_reward = rewards best_reward = rewards
trajectory.append(exp)
trajectory.append(exp) trajectory.append(exp)
if len(trajectory) < TRAJECTORY_SIZE: if len(trajectory) < TRAJECTORY_SIZE:
...@@ -159,7 +159,7 @@ if __name__ == "__main__": ...@@ -159,7 +159,7 @@ if __name__ == "__main__":
traj_actions_v = torch.FloatTensor(traj_actions).to(device) traj_actions_v = torch.FloatTensor(traj_actions).to(device)
traj_adv_v, traj_ref_v = calc_adv_ref(trajectory, net_crt, traj_states_v, device=device) traj_adv_v, traj_ref_v = calc_adv_ref(trajectory, net_crt, traj_states_v, device=device)
mu_v = net_act(traj_states_v) mu_v = net_act(traj_states_v)
mu_v = F.log_softmax(mu_v, dim=1) # mu_v = F.log_softmax(mu_v, dim=1)
old_logprob_v = calc_logprob(mu_v, net_act.logstd, traj_actions_v) old_logprob_v = calc_logprob(mu_v, net_act.logstd, traj_actions_v)
# normalize advantages # normalize advantages
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment