-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathevaluate.py
More file actions
36 lines (32 loc) · 1.6 KB
/
evaluate.py
File metadata and controls
36 lines (32 loc) · 1.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# rlte/evaluate.py
# Auswertung & Plots wie Fig. 3/4: E[reward], Std; Histogramme LN vs. TWAP vs. SL.
# (DR bleibt optional/abgeschaltet, da hier nicht trainiert.) :contentReference[oaicite:9]{index=9}
from __future__ import annotations
import numpy as np
import matplotlib.pyplot as plt
import torch
from . import config as C
from .train_ln import train_ln
from .baselines import run_SL, run_TWAP
def eval_all(markets=("noise","tactical","strategic"), lots=(20,60), device="cpu"):
results = {}
for mkt in markets:
for M0 in lots:
print(f"Training LN in market={mkt}, M0={M0}")
train_ln(market=mkt, M0=M0, device=device)
# Nach Training: für Metrik/Histogramme führen wir stichprobenartig Episoden aus,
# indem wir die (finalen) Politiken benutzen. Zur Vereinfachung verwenden wir die
# zuletzt trainierte Policy-Funktion indirekt über train_ln (oder man würde checkpoin-ten).
# Hier zeichnen wir nur Baselines (SL/TWAP) als Referenz – LN-Returns aus Training.
mu_SL, sd_SL = run_SL(mkt, M0, episodes=C.EVAL_EPISODES)
mu_TWAP, sd_TWAP = run_TWAP(mkt, M0, episodes=C.EVAL_EPISODES)
results[(mkt,M0)] = {
"SL": (mu_SL, sd_SL),
"TWAP": (mu_TWAP, sd_TWAP),
# "LN": -> in einer produktiven Version: Returns während Training/rollouts sammeln
}
print(f"[{mkt} M={M0}] SL: mean={mu_SL:.3f} sd={sd_SL:.3f} | "
f"TWAP: mean={mu_TWAP:.3f} sd={sd_TWAP:.3f}")
return results
if __name__ == "__main__":
eval_all()