#!/usr/bin/env python3
"""
PRIORS No.1 — The Pattern That Wasn't
A runnable, dependency-free demonstration of overfitting / selection bias.

What it shows: if you test enough random "strategies" against pure noise and
keep the one with the best in-sample Sharpe, it will look brilliant on the data
it was chosen from — and collapse on data it has never seen.

This is SYNTHETIC data. It is an illustration of a statistical trap, NOT a
strategy, a backtest of any real approach, or any representation of performance.

Run:  python3 priors-01-overfitting.py
Out:  priors-01-results.md  +  02-insample-outofsample-chart.svg
Pure standard library. Deterministic (fixed seed) so anyone gets the same result.
"""
import random
import math
import statistics

SEED = 254
T = 504           # ~2 trading years
SPLIT = 252       # first year = in-sample, second = out-of-sample
N = 1000          # number of random strategies we "search" over
ANN = math.sqrt(252)

def sharpe(returns):
    if len(returns) < 2:
        return 0.0
    sd = statistics.pstdev(returns)
    if sd == 0:
        return 0.0
    return (statistics.mean(returns) / sd) * ANN

def main():
    random.seed(SEED)

    # The market is pure noise: zero expected return. There is NO edge to find.
    market = [random.gauss(0, 0.01) for _ in range(T)]

    # Each "strategy" is a random sequence of long/short bets, fixed in advance.
    strategies = []
    for _ in range(N):
        pos = [random.choice((-1, 1)) for _ in range(T)]
        ret = [pos[t] * market[t] for t in range(T)]
        strategies.append((sharpe(ret[:SPLIT]), ret))

    # Keep the single best strategy BY IN-SAMPLE SHARPE — i.e. we "discover" it.
    strategies.sort(key=lambda x: -x[0])
    best_is_sharpe, best_ret = strategies[0]
    best_oos_sharpe = sharpe(best_ret[SPLIT:])

    # Context: the whole population, and the top-10 winners out-of-sample.
    all_is = [s for s, _ in strategies]
    top10_oos = sorted(sharpe(r[SPLIT:]) for _, r in strategies[:10])
    median_is = statistics.median(all_is)
    top10_lo, top10_hi = top10_oos[0], top10_oos[-1]

    # Equity curve of the "winner" across the full period (cumulative return).
    eq, run = [], 0.0
    for r in best_ret:
        run += r
        eq.append(run)

    # ---- honest write-up -------------------------------------------------
    md = f"""# Priors No.1 — results (reproducible)

Synthetic illustration of overfitting. Not a strategy or any performance claim.

- Strategies searched: **{N}** (each a random sequence of long/short bets)
- Market: **pure noise**, zero expected return — there is no real edge to find
- Period: {T} days, split {SPLIT} in-sample / {T-SPLIT} out-of-sample
- Seed: {SEED} (deterministic)

| Metric | Sharpe |
|---|---|
| Best strategy — **in-sample** (the number that would sell it) | **{best_is_sharpe:.2f}** |
| Same strategy — **out-of-sample** (what was actually left) | **{best_oos_sharpe:.2f}** |
| Median strategy — in-sample | {median_is:.2f} |
| Top-10 in-sample winners — out-of-sample (they scatter) | {top10_lo:.2f} to {top10_hi:.2f} |

The best in-sample Sharpe of ~{best_is_sharpe:.1f} was produced by luck alone, from
searching {N} random bets against noise. Out-of-sample it is ~{best_oos_sharpe:.1f}.
The honest number was never the {best_is_sharpe:.1f}. It was the {best_is_sharpe:.1f}
*penalized for how many times we went looking.*
"""
    with open("priors-01-results.md", "w") as f:
        f.write(md)

    # ---- chart (SVG built from the real numbers) -------------------------
    write_svg(eq, best_is_sharpe, best_oos_sharpe)

    print(md)
    print("wrote priors-01-results.md and 02-insample-outofsample-chart.svg")

def write_svg(eq, is_sharpe, oos_sharpe):
    X0, X1, Y0, Y1 = 200, 1460, 290, 720          # plot box
    lo, hi = min(eq), max(eq)
    pad = (hi - lo) * 0.12 or 1.0
    lo, hi = lo - pad, hi + pad
    def px(i): return X0 + (X1 - X0) * (i / (len(eq) - 1))
    def py(v): return Y1 - (Y1 - Y0) * ((v - lo) / (hi - lo))
    split_x = px(SPLIT)
    is_pts = " ".join(f"{px(i):.1f},{py(v):.1f}" for i, v in enumerate(eq[:SPLIT+1]))
    oos_pts = " ".join(f"{px(i):.1f},{py(v):.1f}" for i, v in enumerate(eq) if i >= SPLIT)
    svg = f'''<svg width="1600" height="900" viewBox="0 0 1600 900" xmlns="http://www.w3.org/2000/svg" font-family="'Helvetica Neue','Arial',sans-serif">
  <defs>
    <radialGradient id="bg" cx="80%" cy="6%" r="95%"><stop offset="0%" stop-color="#101713"/><stop offset="55%" stop-color="#0a0d0b"/><stop offset="100%" stop-color="#070908"/></radialGradient>
    <pattern id="g" width="50" height="50" patternUnits="userSpaceOnUse"><path d="M50 0H0V50" fill="none" stroke="#ffffff" stroke-opacity="0.02"/></pattern>
  </defs>
  <rect width="1600" height="900" fill="url(#bg)"/><rect width="1600" height="900" fill="url(#g)"/>
  <text x="96" y="92" fill="#F4C84B" font-family="Menlo,monospace" font-size="18" letter-spacing="4">PRIORS No.1 · ANTHERA</text>
  <text x="94" y="150" fill="#F4F4ED" font-size="50" font-weight="600" letter-spacing="-0.5">The pattern that wasn't</text>
  <text x="96" y="192" fill="#9AA39B" font-size="23">{N} random strategies vs pure noise. This is the best one — by luck.</text>

  <line x1="{X0}" y1="{Y1}" x2="{X1}" y2="{Y1}" stroke="#ffffff" stroke-opacity="0.10"/>
  <line x1="{X0}" y1="{Y0}" x2="{X0}" y2="{Y1}" stroke="#ffffff" stroke-opacity="0.10"/>
  <line x1="{split_x:.1f}" y1="{Y0}" x2="{split_x:.1f}" y2="{Y1}" stroke="#9AA39B" stroke-opacity="0.45" stroke-width="1.5" stroke-dasharray="4 6"/>
  <text x="{split_x:.1f}" y="{Y0-14}" fill="#9AA39B" font-family="Menlo,monospace" font-size="15" text-anchor="middle" letter-spacing="2">THE PRESENT · UNSEEN DATA →</text>

  <polyline points="{is_pts}" fill="none" stroke="#F4C84B" stroke-width="3.5"/>
  <polyline points="{oos_pts}" fill="none" stroke="#E64B4B" stroke-width="3.5"/>

  <text x="{(X0+split_x)/2:.0f}" y="{Y1-18}" fill="#6f7a71" font-family="Menlo,monospace" font-size="15" text-anchor="middle" letter-spacing="2">IN-SAMPLE</text>
  <text x="{(split_x+X1)/2:.0f}" y="{Y1-18}" fill="#6f7a71" font-family="Menlo,monospace" font-size="15" text-anchor="middle" letter-spacing="2">OUT-OF-SAMPLE</text>
  <text x="{X0+40}" y="{Y0+50}" fill="#F4C84B" font-size="22">in-sample Sharpe {is_sharpe:.1f} — looks brilliant</text>
  <text x="{split_x+40:.0f}" y="{Y0+50}" fill="#E64B4B" font-size="22">out-of-sample Sharpe {oos_sharpe:.1f} — nothing was there</text>

  <line x1="96" y1="826" x2="1504" y2="826" stroke="#ffffff" stroke-opacity="0.08"/>
  <text x="96" y="862" fill="#F4F4ED" font-size="21" font-weight="700" letter-spacing="3">ANTHERA<tspan fill="#6f7a71" font-weight="400"> CAPITAL</tspan></text>
  <text x="1504" y="862" text-anchor="end" fill="#6f7a71" font-family="Menlo,monospace" font-size="14" letter-spacing="1">synthetic data · illustration of overfitting · not a strategy or performance</text>
</svg>
'''
    with open("02-insample-outofsample-chart.svg", "w") as f:
        f.write(svg)

if __name__ == "__main__":
    main()
