Skip to content

Commit

Permalink
Merge pull request #29 from Ipsedo/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
Ipsedo authored Oct 1, 2024
2 parents 9d33b58 + a3fbb8d commit 70ed081
Show file tree
Hide file tree
Showing 21 changed files with 105 additions and 157 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,13 @@ An internet connexion is also required in order to download dependencies inside
Run training on creature muscles
```bash
$ cd /path/to/EvoMotion/build
$ evo_motion muscles actor_critic_liquid -p hidden_size=32 -p seed=1234 -p learning_rate=1e-3 -p batch_size=32 -p gamma=0.99 -p first_entropy_factor=1e-1 -p wanted_entropy_factor=1e-2 -p entropy_factor_steps=4096 -p unfolding_steps=6 --env_seed 1234 --cuda train ./out/muscle_a2c_liquid --episodes 512 --nb_saves 4096
$ evo_motion muscles actor_critic_liquid -p hidden_size=64 -p seed=1234 -p learning_rate=1e-2 -p batch_size=16 -p gamma=0.995 -p unfolding_steps=6 -p entropy_factor=1e-2 --env_seed 1234 --cuda train ./out/muscle_a2c_liquid --episodes 512 --nb_saves 4096
```
4. After the first save (here after 1024 episodes), you can now evaluate your trained agent.

Evaluate agent on creature muscles (here the first model save) with GLFW window of 1920 * 1024 pixels
```bash
$ evo_motion muscles actor_critic_liquid --env_seed 1234 -p hidden_size=1 -p seed=1234 -p learning_rate=1e-3 -p batch_size=32 -p gamma=0.99 -p first_entropy_factor=1e-1 -p wanted_entropy_factor=1e-2 -p entropy_factor_steps=4096 -p unfolding_steps=6 --cuda run ./out/muscles_a2c_liquid/save_0 -w 1920 -h 1024
$ evo_motion muscles actor_critic_liquid --env_seed 1234 -p hidden_size=64 -p seed=1234 -p learning_rate=1e-2 -p batch_size=16 -p gamma=0.995 -p unfolding_steps=6 -p entropy_factor=1e-2 --cuda run ./out/muscles_a2c_liquid/save_0 -w 1920 -h 1024
```

## References
Expand Down
10 changes: 5 additions & 5 deletions evo_motion_model/src/env/creature_env.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ MuscleEnv::MuscleEnv(const int seed)
skeleton_json_path("./resources/skeleton/spider_new.json"),
skeleton(skeleton_json_path, "spider", glm::mat4(1.f)),
muscular_system(skeleton, skeleton_json_path), initial_remaining_seconds(1.f),
max_episode_seconds(60.f), target_velocity(1e-1f), reset_frames(10), curr_step(0),
max_steps(static_cast<int>(max_episode_seconds / DELTA_T_MODEL)),
max_episode_seconds(30.f), target_velocity(5e-1), minimal_velocity(1e-1f), reset_frames(10),
curr_step(0), max_steps(static_cast<int>(max_episode_seconds / DELTA_T_MODEL)),
remaining_steps(static_cast<int>(initial_remaining_seconds / DELTA_T_MODEL)) {
base.get_body()->setFriction(0.5f);

Expand Down Expand Up @@ -79,8 +79,8 @@ step MuscleEnv::compute_step() {
const float lin_vel_z = root.get_body()->getLinearVelocity().z();
const float reward = lin_vel_z;

if (lin_vel_z < target_velocity) remaining_steps -= 1;
else remaining_steps += 1;
if (lin_vel_z < minimal_velocity) remaining_steps -= 1;
else if (lin_vel_z >= target_velocity) remaining_steps += 1;

const bool win = curr_step >= max_steps;
const bool fail = remaining_steps <= 0;
Expand All @@ -95,7 +95,7 @@ void MuscleEnv::reset_engine() {
// reset model transform
glm::vec3 root_pos(1.f, 0.25f, 2.f);

float angle_limit = static_cast<float>(M_PI) / 4.f;
float angle_limit = static_cast<float>(M_PI) / 3.f;

float angle_yaw = rd_uni(rng) * angle_limit - angle_limit / 2.f;
float angle_roll = rd_uni(rng) * angle_limit - angle_limit / 2.f;
Expand Down
1 change: 1 addition & 0 deletions evo_motion_model/src/env/creature_env.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ class MuscleEnv final : public Environment {
float max_episode_seconds;

float target_velocity;
float minimal_velocity;

int reset_frames;
int curr_step;
Expand Down
38 changes: 0 additions & 38 deletions evo_motion_networks/include/evo_motion_networks/agent_builder.h

This file was deleted.

95 changes: 43 additions & 52 deletions evo_motion_networks/src/agents/actor_critic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,19 +137,16 @@ critic_response CriticModule::forward(const torch::Tensor &state) {

ActorCritic::ActorCritic(
const int seed, const std::vector<int64_t> &state_space,
const std::vector<int64_t> &action_space, int hidden_size, int batch_size, float lr,
float gamma, float first_entropy_factor, float wanted_entropy_factor, long entropy_factor_steps)
const std::vector<int64_t> &action_space, int hidden_size, const int batch_size, float lr,
const float gamma, const float entropy_factor)
: actor(std::make_shared<ActorModule>(state_space, action_space, hidden_size)),
actor_optimizer(std::make_shared<torch::optim::Adam>(actor->parameters(), lr)),
critic(std::make_shared<CriticModule>(state_space, hidden_size)),
critic_optimizer(std::make_shared<torch::optim::Adam>(critic->parameters(), lr)),
gamma(gamma), first_entropy_factor(first_entropy_factor),
wanted_entropy_factor(wanted_entropy_factor), entropy_factor_steps(entropy_factor_steps),
curr_device(torch::kCPU), batch_size(batch_size), episodes_buffer(), episode_policy_loss(0.f),
episode_policy_entropy(0.f), episode_critic_loss(0.f), curr_step(0L), curr_train_step(0L) {
at::manual_seed(seed);
episodes_buffer.push_back({});
}
gamma(gamma), entropy_factor(entropy_factor), curr_device(torch::kCPU),
batch_size(batch_size), episodes_buffer({{}}),
episode_actor_loss(0.f), episode_critic_loss(0.f), curr_episode_step(0),
last_episode_steps(0), curr_train_step(0L) { at::manual_seed(seed); }

torch::Tensor ActorCritic::act(const torch::Tensor state, const float reward) {
const auto [mu, sigma] = actor->forward(state);
Expand All @@ -163,6 +160,8 @@ torch::Tensor ActorCritic::act(const torch::Tensor state, const float reward) {
episodes_buffer.back().value_buffer.push_back(value);
episodes_buffer.back().actions_buffer.push_back(action);

curr_episode_step++;

return action;
}

Expand All @@ -173,7 +172,7 @@ void ActorCritic::train(
const auto gamma_factor =
torch::pow(
gamma, torch::arange(batched_rewards.size(1), at::TensorOptions().device(curr_device)))
.unsqueeze(0);
.unsqueeze(0);

auto returns = (batched_rewards * gamma_factor).flip({1}).cumsum(1).flip({1}) / gamma_factor;
returns = (returns - returns.mean()) / (returns.std() + 1e-8);
Expand All @@ -182,11 +181,9 @@ void ActorCritic::train(
truncated_normal_pdf(batched_actions.detach(), batched_mus, batched_sigmas, -1.f, 1.f);
const auto policy_loss = torch::log(prob) * (returns - batched_values).detach().unsqueeze(-1);
const auto policy_entropy = truncated_normal_entropy(batched_mus, batched_sigmas, -1.f, 1.f);
const auto entropy_factor = get_exponential_entropy_factor();
const auto actor_loss =
-torch::mean(torch::sum(policy_loss + entropy_factor * policy_entropy, -1));

const auto critic_loss = torch::smooth_l1_loss(batched_values, returns, at::Reduction::Mean);
const auto actor_loss = -torch::mean(
torch::sum(policy_loss + policy_entropy * entropy_factor, -1));
const auto critic_loss = torch::mse_loss(batched_values, returns, at::Reduction::Mean);

actor_optimizer->zero_grad();
actor_loss.backward();
Expand All @@ -196,17 +193,16 @@ void ActorCritic::train(
critic_loss.backward();
critic_optimizer->step();

episode_policy_loss = -policy_loss.sum(-1).mean().cpu().item().toFloat();
episode_policy_entropy = -entropy_factor * policy_entropy.sum(-1).mean().cpu().item().toFloat();
episode_actor_loss = actor_loss.cpu().item().toFloat();
episode_critic_loss = critic_loss.cpu().item().toFloat();

curr_train_step++;
}

float ActorCritic::get_exponential_entropy_factor() const {
/*float ActorCritic::get_exponential_entropy_factor() const {
const auto lambda = -std::log(wanted_entropy_factor) / static_cast<float>(entropy_factor_steps);
return first_entropy_factor * std::exp(-lambda * static_cast<float>(curr_train_step));
}
}*/

void ActorCritic::done(const float reward) {
episodes_buffer.back().rewards_buffer.push_back(reward);
Expand All @@ -215,50 +211,43 @@ void ActorCritic::done(const float reward) {
if (actor->is_training() && static_cast<int>(episodes_buffer.size()) == batch_size) {
int episode_max_step = 0;

std::vector<torch::Tensor> actions_per_episode;
std::vector<torch::Tensor> values_per_episode;
std::vector<torch::Tensor> mus_per_episode;
std::vector<torch::Tensor> sigmas_per_episode;
std::vector<torch::Tensor> rewards_per_episode;
std::vector<torch::Tensor> actions_per_episode, values_per_episode, mus_per_episode,
sigmas_per_episode, rewards_per_episode;

for (const auto &e: episodes_buffer)
episode_max_step =
std::max(static_cast<int>(e.actions_buffer.size()), episode_max_step);
for (const auto &[mu_buffer, sigma_buffer, value_buffer, rewards_buffer, actions_buffer]:
episodes_buffer)
episode_max_step = std::max(static_cast<int>(actions_buffer.size()), episode_max_step);

for (const auto &e: episodes_buffer) {
int pad = episode_max_step - static_cast<int>(e.actions_buffer.size());
for (const auto &[mu_buffer, sigma_buffer, value_buffer, rewards_buffer, actions_buffer]:
episodes_buffer) {
int pad = episode_max_step - static_cast<int>(actions_buffer.size());

actions_per_episode.push_back(
torch::pad(torch::stack(e.actions_buffer), {0, 0, 0, pad}));
actions_per_episode.push_back(torch::pad(torch::stack(actions_buffer), {0, 0, 0, pad}));

values_per_episode.push_back(torch::pad(torch::cat(e.value_buffer), {0, pad}));
values_per_episode.push_back(torch::pad(torch::cat(value_buffer), {0, pad}));

mus_per_episode.push_back(torch::pad(torch::stack(e.mu_buffer), {0, 0, 0, pad}));
mus_per_episode.push_back(torch::pad(torch::stack(mu_buffer), {0, 0, 0, pad}));
sigmas_per_episode.push_back(
torch::pad(torch::stack(e.sigma_buffer), {0, 0, 0, pad}, "constant", 1.f));
torch::pad(torch::stack(sigma_buffer), {0, 0, 0, pad}, "constant", 1.f));

rewards_per_episode.push_back(torch::pad(
torch::tensor(e.rewards_buffer, at::TensorOptions().device(curr_device)),
{0, pad}));
rewards_per_episode.push_back(
torch::pad(
torch::tensor(rewards_buffer, at::TensorOptions().device(curr_device)),
{0, pad}));
}

const torch::Tensor batched_actions = torch::stack(actions_per_episode);
const torch::Tensor batched_values = torch::stack(values_per_episode);
const torch::Tensor batched_mus = torch::stack(mus_per_episode);
const torch::Tensor batched_sigmas = torch::stack(sigmas_per_episode);
const torch::Tensor batched_rewards = torch::stack(rewards_per_episode);
train(
torch::stack(actions_per_episode), torch::stack(values_per_episode),
torch::stack(mus_per_episode), torch::stack(sigmas_per_episode),
torch::stack(rewards_per_episode));

train(batched_actions, batched_values, batched_mus, batched_sigmas, batched_rewards);

episodes_buffer.clear();

} else if (!actor->is_training()) {
episodes_buffer.clear();
}
} else if (!actor->is_training()) { episodes_buffer.clear(); }

episodes_buffer.push_back({});

curr_step++;
last_episode_steps = curr_episode_step;
curr_episode_step = 0;
}

void ActorCritic::save(const std::string &output_folder_path) {
Expand Down Expand Up @@ -335,9 +324,11 @@ std::map<std::string, float> ActorCritic::get_metrics() {
}

return {
{"policy_loss", episode_policy_loss}, {"policy_entropy", episode_policy_entropy},
{"critic_loss", episode_critic_loss}, {"entropy_factor", get_exponential_entropy_factor()},
{"actor_grad_mean", actor_grad}, {"critic_grad_mean", critic_grad}};
{"actor_loss", episode_actor_loss},
{"critic_loss", episode_critic_loss},
{"actor_grad_mean", actor_grad},
{"critic_grad_mean", critic_grad},
{"episode_steps", last_episode_steps}};
}

void ActorCritic::to(const torch::DeviceType device) {
Expand Down
16 changes: 6 additions & 10 deletions evo_motion_networks/src/agents/actor_critic.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,34 +110,30 @@ class ActorCritic : public Agent {

private:
float gamma;
float first_entropy_factor;
float wanted_entropy_factor;
long entropy_factor_steps;
float entropy_factor;

torch::DeviceType curr_device;

int batch_size;
std::vector<episode_buffer> episodes_buffer;

float episode_policy_loss;
float episode_policy_entropy;
float episode_actor_loss;
float episode_critic_loss;

long curr_step;
int curr_episode_step;
int last_episode_steps;

long curr_train_step;

void train(
const torch::Tensor &batched_actions, const torch::Tensor &batched_values,
const torch::Tensor &batched_mus, const torch::Tensor &batched_sigmas,
const torch::Tensor &batched_rewards);

float get_exponential_entropy_factor() const;

public:
ActorCritic(
int seed, const std::vector<int64_t> &state_space, const std::vector<int64_t> &action_space,
int hidden_size, int batch_size, float lr, float gamma, float first_entropy_factor,
float wanted_entropy_factor, long entropy_factor_steps);
int hidden_size, int batch_size, float lr, float gamma, float entropy_factor);

torch::Tensor act(torch::Tensor state, float reward) override;

Expand Down
31 changes: 15 additions & 16 deletions evo_motion_networks/src/agents/actor_critic_liquid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ LiquidCellModule::LiquidCellModule(
this->neuron_number = neuron_number;
steps = unfolding_steps;

const float std_w = 1e-1;
const float std_b = 1e-1;
constexpr float std_w = 1e-1f;
constexpr float std_b = 1e-1f;

weight = register_module(
"weight",
Expand All @@ -31,22 +31,23 @@ LiquidCellModule::LiquidCellModule(
a = register_parameter("a", torch::ones({1, neuron_number}));
tau = register_parameter("tau", torch::ones({1, neuron_number}));

torch::nn::init::xavier_normal_(weight->weight, std_w / static_cast<float>(unfolding_steps));
torch::nn::init::xavier_normal_(
recurrent_weight->weight, std_w / static_cast<float>(unfolding_steps));
torch::nn::init::normal_(bias, 0, std_b / static_cast<float>(unfolding_steps));
torch::nn::init::normal_(weight->weight, 0, std_w / static_cast<float>(unfolding_steps));
torch::nn::init::normal_(
recurrent_weight->weight, 0, std_w / static_cast<float>(unfolding_steps));
torch::nn::init::normal_(bias, 0, std_b);

reset_x_t();
}

void LiquidCellModule::reset_x_t() {
x_t = torch::mish(torch::randn(
{1, neuron_number}, torch::TensorOptions().device(recurrent_weight->weight.device())));
x_t = torch::silu(
torch::randn(
{1, neuron_number}, torch::TensorOptions().device(recurrent_weight->weight.device())));
}

torch::Tensor
LiquidCellModule::compute_step(const torch::Tensor &x_t_curr, const torch::Tensor &i_t) {
return torch::mish(weight->forward(i_t) + recurrent_weight->forward(x_t_curr) + bias);
return torch::silu(weight->forward(i_t) + recurrent_weight->forward(x_t_curr) + bias);
}

torch::Tensor LiquidCellModule::forward(const torch::Tensor &state) {
Expand Down Expand Up @@ -86,7 +87,7 @@ ActorCriticLiquidNetwork::ActorCriticLiquidNetwork(

sigma = register_module(
"sigma", torch::nn::Sequential(
torch::nn::Linear(hidden_size, action_space[0]), torch::nn::Softplus()));
torch::nn::Linear(hidden_size, action_space[0]), torch::nn::Softplus()));

critic = register_module("critic", torch::nn::Linear(hidden_size, 1));

Expand Down Expand Up @@ -123,7 +124,7 @@ ActorLiquidNetwork::ActorLiquidNetwork(

sigma = register_module(
"sigma", torch::nn::Sequential(
torch::nn::Linear(hidden_size, action_space[0]), torch::nn::Softplus()));
torch::nn::Linear(hidden_size, action_space[0]), torch::nn::Softplus()));

mu->apply(init_weights);
sigma->apply(init_weights);
Expand Down Expand Up @@ -167,12 +168,10 @@ critic_response CriticLiquidNetwork::forward(const torch::Tensor &state) {

ActorCriticLiquid::ActorCriticLiquid(
const int seed, const std::vector<int64_t> &state_space,
const std::vector<int64_t> &action_space, int hidden_size, int batch_size, float lr,
float gamma, float first_entropy_factor, float wanted_entropy_factor, long entropy_factor_steps,
int unfolding_steps)
const std::vector<int64_t> &action_space, int hidden_size, const int batch_size, float lr,
const float gamma, const float entropy_factor, int unfolding_steps)
: ActorCritic(
seed, state_space, action_space, hidden_size, batch_size, lr, gamma, first_entropy_factor,
wanted_entropy_factor, entropy_factor_steps) {
seed, state_space, action_space, hidden_size, batch_size, lr, gamma, entropy_factor) {

actor = std::make_shared<ActorLiquidNetwork>(
state_space, action_space, hidden_size, unfolding_steps);
Expand Down
4 changes: 2 additions & 2 deletions evo_motion_networks/src/agents/actor_critic_liquid.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,8 @@ class ActorCriticLiquid final : public ActorCritic {
public:
ActorCriticLiquid(
int seed, const std::vector<int64_t> &state_space, const std::vector<int64_t> &action_space,
int hidden_size, int batch_size, float lr, float gamma, float first_entropy_factor,
float wanted_entropy_factor, long entropy_factor_steps, int unfolding_steps);
int hidden_size, int batch_size, float lr, float gamma, float entropy_factor,
int unfolding_steps);

void done(float reward) override;
};
Expand Down
Loading

0 comments on commit 70ed081

Please sign in to comment.