Skip to main content

Advanced Backtesting

This example demonstrates the full backtesting stack: L2 orderbook replay, probabilistic fill models, market impact, latency simulation, Markov regime detection, and walk-forward parameter optimization.

Regime-Adaptive Market Maker with L2 Simulation

import horizon as hz
from horizon.walkforward import walk_forward

# --- 1. Generate synthetic L2 book data ---

def generate_book_data(n_ticks=500):
    """Create synthetic orderbook snapshots."""
    ticks = []
    book_snaps = []
    price = 0.50

    for i in range(n_ticks):
        # Random walk
        noise = ((i * 1103515245 + 12345) % 1000 - 500) / 100000.0
        price = max(0.05, min(0.95, price + noise))

        ts = 1700000000.0 + i

        ticks.append({"timestamp": ts, "price": round(price, 4)})
        book_snaps.append({
            "timestamp": ts,
            "bids": [
                (round(price - 0.01, 4), 100),
                (round(price - 0.02, 4), 200),
                (round(price - 0.03, 4), 500),
            ],
            "asks": [
                (round(price + 0.01, 4), 100),
                (round(price + 0.02, 4), 200),
                (round(price + 0.03, 4), 500),
            ],
        })

    return ticks, book_snaps


ticks, book_snaps = generate_book_data(500)

# --- 2. Train Markov regime model ---

prices = [t["price"] for t in ticks[:200]]
returns = hz.prices_to_returns(prices)

model = hz.MarkovRegimeModel(n_states=2)
model.fit(returns, max_iters=100)

# Inspect what the model learned
for i, (mean, var) in enumerate(model.emission_params()):
    print(f"State {i}: mean={mean:.6f}, std={var**0.5:.6f}")

# --- 3. Define regime-adaptive strategy ---

def regime_quoter(ctx):
    regime = ctx.params.get("regime", 0)
    vol_prob = ctx.params.get("regime_vol_state", 0.0)

    fair = ctx.feed.price
    spread = 0.04 + vol_prob * 0.06    # 4-10 cents depending on regime
    size = 10.0 if regime == 0 else 3.0  # Reduce size in volatile regime

    return hz.quotes(fair=fair, spread=spread, size=size)

# --- 4. Backtest with L2 book + probabilistic fills ---

result = hz.backtest(
    name="regime-l2-backtest",
    markets=["test-market"],
    data=ticks,
    pipeline=[
        hz.markov_regime(model=model),
        regime_quoter,
    ],
    book_data={"test-market": book_snaps},
    fill_model="probabilistic",
    fill_model_params={"lambda": 1.0, "queue_frac": 0.5},
    impact_temporary_bps=3.0,
    impact_permanent_fraction=0.2,
    latency_ms=20.0,
    initial_capital=1000.0,
    rng_seed=42,
)

print(result.summary())
print(f"Trades executed: {len(result.trades)}")

Walk-Forward Optimization

Find optimal parameters with out-of-sample validation:
# --- 5. Walk-forward optimization ---

def pipeline_factory(params):
    """Build pipeline from parameter dict."""
    spread = params["spread"]
    size = params["size"]

    def quoter(ctx):
        regime = ctx.params.get("regime", 0)
        vol_mult = 1.5 if regime == 1 else 1.0
        return hz.quotes(
            fair=ctx.feed.price,
            spread=spread * vol_mult,
            size=size,
        )

    return [hz.markov_regime(model=model), quoter]

wf_result = walk_forward(
    data=ticks,
    pipeline_factory=pipeline_factory,
    param_grid={
        "spread": [0.02, 0.04, 0.06],
        "size": [5, 10, 20],
    },
    n_splits=4,
    train_ratio=0.6,
    expanding=True,
    objective="sharpe_ratio",
    purge_gap=10.0,
    book_data={"test-market": book_snaps},
    fill_model="probabilistic",
    fill_model_params={"lambda": 1.0, "queue_frac": 0.5},
    rng_seed=42,
)

# Print results per window
for i, (window, params) in enumerate(
    zip(wf_result.windows, wf_result.best_params_per_window)
):
    oos = wf_result.test_results[i]
    print(f"Window {i}: best={params}, OOS trades={len(oos.trades)}, "
          f"OOS sharpe={oos.metrics.sharpe_ratio:.3f}")

# Aggregate out-of-sample
m = wf_result.aggregate_metrics
print(f"\nAggregate OOS: {m.total_return_pct:.2%} return, "
      f"Sharpe={m.sharpe_ratio:.3f}, {m.total_trades} trades")

Calibration Analysis

Evaluate prediction accuracy on resolved markets:
from horizon._horizon import calibration_curve, log_loss, edge_decay

# Simulate predictions and outcomes
predictions = [0.3, 0.7, 0.9, 0.1, 0.6, 0.8, 0.4, 0.5]
outcomes = [0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0]

# Calibration curve
cal = calibration_curve(predictions, outcomes, n_bins=5)
print(f"Brier Score: {cal.brier_score:.4f}")
print(f"Log Loss:    {cal.log_loss:.4f}")
print(f"ECE:         {cal.ece:.4f}")

for center, freq, count in cal.bins:
    if count > 0:
        print(f"  Predicted ~{center:.0%}: Actual {freq:.0%} (n={count})")

# Edge decay
entry_ts = [1000.0, 2000.0, 3000.0, 4000.0]
resolution_ts = [5000.0, 5000.0, 5000.0, 5000.0]

decay = edge_decay(
    [0.45, 0.55, 0.40, 0.60],
    [1.0, 1.0, 0.0, 0.0],
    entry_ts,
    resolution_ts,
    n_buckets=5,
)
print(f"\nEdge half-life: {decay.half_life_hours:.1f} hours")

Comparing Fill Models

See how different fill models affect your backtest results:
for fm in ["deterministic", "probabilistic", "glft"]:
    r = hz.backtest(
        data=ticks,
        pipeline=[regime_quoter],
        book_data={"test-market": book_snaps},
        fill_model=fm,
        fill_model_params={"lambda": 1.0, "queue_frac": 0.5, "intensity": 1.0, "kappa": 1.5},
        rng_seed=42,
    )
    m = r.metrics
    print(f"{fm:15s}: {len(r.trades):3d} trades, "
          f"return={m.total_return_pct:+.2%}, sharpe={m.sharpe_ratio:.3f}")
Deterministic fills are optimistic (100% fill rate). Probabilistic and GLFT models produce fewer fills but more realistic PnL estimates. Compare all three to understand the sensitivity of your strategy to fill assumptions.