Quickstart Guide

Training

python
from ddql_optimal_execution import DDQL, MarketEnvironnement, Trainer


#Create environnement
env = MarketEnvironnement(initial_inventory=500, multi_episodes=True, QV=True, Volume=True, data_path='../data/train')
#Create agent
agent = DDQL(state_size=env.state_size, initial_budget=env.initial_inventory, horizon=env.horizon)

#Create experience replay for storing experience
trainer = Trainer(agent, env, capacity=10000)

trainer.fill_exp_replay(max_steps=10000)

trainer.pretrain(max_steps=100, batch_size=128)

trainer.train(max_steps=1000, batch_size=128)

Testing vs TWAP

python
from ddql_optimal_execution import TWAP

#Create environnement
env = MarketEnvironnement(initial_inventory=500, multi_episodes=True, QV=True, Volume=True, data_path='../data/test')

#Create agent
agent = DDQL(state_size=env.state_size, initial_budget=env.initial_inventory, horizon=env.horizon)
twap = TWAP(initial_inventory=env.initial_inventory, horizon=env.horizon)

pnl_twap = []
pnl_ddql = []

n_episodes = min(len(test_env.historical_data_series), 100)

random_ep = np.random.choice(np.arange(n_episodes), size=n_episodes, replace=True)

for ep in random_ep:
   test_env.swap_episode(ep)
   _pnl_twap = [0]
   while not test_env.done:
      current_state = test_env.state.copy()
      action = twap(current_state)
      _ = test_env.step(action)

   pnl_twap.append(test_env.pnl_for_episode + [test_env.state['Price']*test_env.state['inventory'] - test_env.quadratic_penalty_coefficient*(test_env.state['inventory']/test_env.initial_inventory)**2 / test_env.horizon])

   test_env.reset()

   _pnl_ddql = [0]
   while not test_env.done:
      current_state = test_env.state.copy()
      action = trainer.agent(current_state)
      _ = test_env.step(action)
   pnl_ddql.append(test_env.pnl_for_episode + [test_env.state['Price']*test_env.state['inventory'] - test_env.quadratic_penalty_coefficient*(test_env.state['inventory']/test_env.initial_inventory)**2 / test_env.horizon])pnl_twap = []