Commit 29fcf1f9 authored by holgadoa's avatar holgadoa

removing sofmax and bug

parent b173b190
......@@ -146,7 +146,7 @@ if __name__ == "__main__":
torch.save(net_act.state_dict(), fname)
best_reward = rewards
trajectory.append(exp)
trajectory.append(exp)
if len(trajectory) < TRAJECTORY_SIZE:
......@@ -159,7 +159,7 @@ if __name__ == "__main__":
traj_actions_v = torch.FloatTensor(traj_actions).to(device)
traj_adv_v, traj_ref_v = calc_adv_ref(trajectory, net_crt, traj_states_v, device=device)
mu_v = net_act(traj_states_v)
mu_v = F.log_softmax(mu_v, dim=1)
# mu_v = F.log_softmax(mu_v, dim=1)
old_logprob_v = calc_logprob(mu_v, net_act.logstd, traj_actions_v)
# normalize advantages
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment