Advanced Backtesting
This example demonstrates the full backtesting stack: L2 orderbook replay, probabilistic fill models, market impact, latency simulation, Markov regime detection, and walk-forward parameter optimization.Regime-Adaptive Market Maker with L2 Simulation
Copy
import horizon as hz
from horizon.walkforward import walk_forward
# --- 1. Generate synthetic L2 book data ---
def generate_book_data(n_ticks=500):
"""Create synthetic orderbook snapshots."""
ticks = []
book_snaps = []
price = 0.50
for i in range(n_ticks):
# Random walk
noise = ((i * 1103515245 + 12345) % 1000 - 500) / 100000.0
price = max(0.05, min(0.95, price + noise))
ts = 1700000000.0 + i
ticks.append({"timestamp": ts, "price": round(price, 4)})
book_snaps.append({
"timestamp": ts,
"bids": [
(round(price - 0.01, 4), 100),
(round(price - 0.02, 4), 200),
(round(price - 0.03, 4), 500),
],
"asks": [
(round(price + 0.01, 4), 100),
(round(price + 0.02, 4), 200),
(round(price + 0.03, 4), 500),
],
})
return ticks, book_snaps
ticks, book_snaps = generate_book_data(500)
# --- 2. Train Markov regime model ---
prices = [t["price"] for t in ticks[:200]]
returns = hz.prices_to_returns(prices)
model = hz.MarkovRegimeModel(n_states=2)
model.fit(returns, max_iters=100)
# Inspect what the model learned
for i, (mean, var) in enumerate(model.emission_params()):
print(f"State {i}: mean={mean:.6f}, std={var**0.5:.6f}")
# --- 3. Define regime-adaptive strategy ---
def regime_quoter(ctx):
regime = ctx.params.get("regime", 0)
vol_prob = ctx.params.get("regime_vol_state", 0.0)
fair = ctx.feed.price
spread = 0.04 + vol_prob * 0.06 # 4-10 cents depending on regime
size = 10.0 if regime == 0 else 3.0 # Reduce size in volatile regime
return hz.quotes(fair=fair, spread=spread, size=size)
# --- 4. Backtest with L2 book + probabilistic fills ---
result = hz.backtest(
name="regime-l2-backtest",
markets=["test-market"],
data=ticks,
pipeline=[
hz.markov_regime(model=model),
regime_quoter,
],
book_data={"test-market": book_snaps},
fill_model="probabilistic",
fill_model_params={"lambda": 1.0, "queue_frac": 0.5},
impact_temporary_bps=3.0,
impact_permanent_fraction=0.2,
latency_ms=20.0,
initial_capital=1000.0,
rng_seed=42,
)
print(result.summary())
print(f"Trades executed: {len(result.trades)}")
Walk-Forward Optimization
Find optimal parameters with out-of-sample validation:Copy
# --- 5. Walk-forward optimization ---
def pipeline_factory(params):
"""Build pipeline from parameter dict."""
spread = params["spread"]
size = params["size"]
def quoter(ctx):
regime = ctx.params.get("regime", 0)
vol_mult = 1.5 if regime == 1 else 1.0
return hz.quotes(
fair=ctx.feed.price,
spread=spread * vol_mult,
size=size,
)
return [hz.markov_regime(model=model), quoter]
wf_result = walk_forward(
data=ticks,
pipeline_factory=pipeline_factory,
param_grid={
"spread": [0.02, 0.04, 0.06],
"size": [5, 10, 20],
},
n_splits=4,
train_ratio=0.6,
expanding=True,
objective="sharpe_ratio",
purge_gap=10.0,
book_data={"test-market": book_snaps},
fill_model="probabilistic",
fill_model_params={"lambda": 1.0, "queue_frac": 0.5},
rng_seed=42,
)
# Print results per window
for i, (window, params) in enumerate(
zip(wf_result.windows, wf_result.best_params_per_window)
):
oos = wf_result.test_results[i]
print(f"Window {i}: best={params}, OOS trades={len(oos.trades)}, "
f"OOS sharpe={oos.metrics.sharpe_ratio:.3f}")
# Aggregate out-of-sample
m = wf_result.aggregate_metrics
print(f"\nAggregate OOS: {m.total_return_pct:.2%} return, "
f"Sharpe={m.sharpe_ratio:.3f}, {m.total_trades} trades")
Calibration Analysis
Evaluate prediction accuracy on resolved markets:Copy
from horizon._horizon import calibration_curve, log_loss, edge_decay
# Simulate predictions and outcomes
predictions = [0.3, 0.7, 0.9, 0.1, 0.6, 0.8, 0.4, 0.5]
outcomes = [0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0]
# Calibration curve
cal = calibration_curve(predictions, outcomes, n_bins=5)
print(f"Brier Score: {cal.brier_score:.4f}")
print(f"Log Loss: {cal.log_loss:.4f}")
print(f"ECE: {cal.ece:.4f}")
for center, freq, count in cal.bins:
if count > 0:
print(f" Predicted ~{center:.0%}: Actual {freq:.0%} (n={count})")
# Edge decay
entry_ts = [1000.0, 2000.0, 3000.0, 4000.0]
resolution_ts = [5000.0, 5000.0, 5000.0, 5000.0]
decay = edge_decay(
[0.45, 0.55, 0.40, 0.60],
[1.0, 1.0, 0.0, 0.0],
entry_ts,
resolution_ts,
n_buckets=5,
)
print(f"\nEdge half-life: {decay.half_life_hours:.1f} hours")
Comparing Fill Models
See how different fill models affect your backtest results:Copy
for fm in ["deterministic", "probabilistic", "glft"]:
r = hz.backtest(
data=ticks,
pipeline=[regime_quoter],
book_data={"test-market": book_snaps},
fill_model=fm,
fill_model_params={"lambda": 1.0, "queue_frac": 0.5, "intensity": 1.0, "kappa": 1.5},
rng_seed=42,
)
m = r.metrics
print(f"{fm:15s}: {len(r.trades):3d} trades, "
f"return={m.total_return_pct:+.2%}, sharpe={m.sharpe_ratio:.3f}")
Deterministic fills are optimistic (100% fill rate). Probabilistic and GLFT models produce fewer fills but more realistic PnL estimates. Compare all three to understand the sensitivity of your strategy to fill assumptions.