#!/usr/bin/env python3
import json
import os
import queue
import subprocess
import threading
import time
from collections import Counter, defaultdict
from pathlib import Path

PROMPTS = Path(os.environ.get('PROMPTS', '/tmp/zhangxuefeng_1000_prompts.jsonl'))
RESULTS = Path(os.environ.get('RESULTS', '/tmp/zhangxuefeng_results.jsonl'))
SUMMARY = Path(os.environ.get('SUMMARY', '/tmp/zhangxuefeng_summary.json'))
CONFIG = os.environ.get('CONFIG', '/root/.nanobot/zhangxuefeng_method_config.json')
SESSION_PREFIX = os.environ.get('SESSION_PREFIX', 'cli:batch')
TIMEOUT = int(os.environ.get('TIMEOUT', '120'))
WORKERS = int(os.environ.get('WORKERS', '4'))

lock = threading.Lock()


def extract_answer(stdout: str) -> str:
    text = (stdout or '').replace('\r\n', '\n').strip()
    if '🐈 nanobot' in text:
        text = text.split('🐈 nanobot', 1)[1].strip()
    lines = [line.rstrip() for line in text.splitlines()]
    filtered = []
    for line in lines:
        s = line.strip()
        if not s:
            if filtered and filtered[-1] != '':
                filtered.append('')
            continue
        if s.startswith('Using config:'):
            continue
        filtered.append(line)
    while filtered and filtered[0] == '':
        filtered.pop(0)
    while filtered and filtered[-1] == '':
        filtered.pop()
    return '\n'.join(filtered).strip()


def run_one(item: dict) -> dict:
    idx = item['idx']
    prompt = item['prompt']
    session = f'{SESSION_PREFIX}_{idx:04d}'
    cmd = ['nanobot', 'agent', '-c', CONFIG, '-s', session, '-m', prompt, '--no-markdown']
    start = time.time()
    try:
        proc = subprocess.run(cmd, capture_output=True, text=True, timeout=TIMEOUT)
        elapsed = round(time.time() - start, 3)
        stdout = proc.stdout or ''
        stderr = proc.stderr or ''
        return {
            'idx': idx,
            'category': item.get('category'),
            'prompt': prompt,
            'session': session,
            'returncode': proc.returncode,
            'elapsed_sec': elapsed,
            'timeout': False,
            'stdout_raw': stdout,
            'stderr_raw': stderr,
            'answer_clean': extract_answer(stdout),
        }
    except subprocess.TimeoutExpired as e:
        elapsed = round(time.time() - start, 3)
        stdout = e.stdout.decode('utf-8', 'ignore') if isinstance(e.stdout, bytes) else (e.stdout or '')
        stderr = e.stderr.decode('utf-8', 'ignore') if isinstance(e.stderr, bytes) else (e.stderr or '')
        return {
            'idx': idx,
            'category': item.get('category'),
            'prompt': prompt,
            'session': session,
            'returncode': None,
            'elapsed_sec': elapsed,
            'timeout': True,
            'stdout_raw': stdout,
            'stderr_raw': stderr,
            'answer_clean': extract_answer(stdout),
        }


def stats_snapshot(done, rows):
    ok = sum(1 for r in rows if (not r['timeout']) and r['returncode'] == 0 and r['answer_clean'])
    timeouts = sum(1 for r in rows if r['timeout'])
    failed = done - ok - timeouts
    by_cat = defaultdict(lambda: Counter(total=0, ok=0, failed=0, timeouts=0))
    for r in rows:
        c = r.get('category') or 'unknown'
        by_cat[c]['total'] += 1
        if r['timeout']:
            by_cat[c]['timeouts'] += 1
        elif r['returncode'] == 0 and r['answer_clean']:
            by_cat[c]['ok'] += 1
        else:
            by_cat[c]['failed'] += 1
    return {
        'total_processed': done,
        'ok': ok,
        'failed': failed,
        'timeouts': timeouts,
        'workers': WORKERS,
        'updated_at': time.strftime('%Y-%m-%d %H:%M:%S'),
        'by_category': by_cat,
    }


def main():
    items = [json.loads(line) for line in PROMPTS.read_text(encoding='utf-8').splitlines() if line.strip()]
    RESULTS.write_text('', encoding='utf-8')
    q = queue.Queue()
    for item in items:
        q.put(item)
    rows = []

    def worker():
        nonlocal rows
        while True:
            try:
                item = q.get_nowait()
            except queue.Empty:
                return
            result = run_one(item)
            with lock:
                rows.append(result)
                with RESULTS.open('a', encoding='utf-8') as out:
                    out.write(json.dumps(result, ensure_ascii=False) + '\n')
                if len(rows) % 20 == 0:
                    snap = stats_snapshot(len(rows), rows)
                    snap['by_category'] = {k: dict(v) for k, v in snap['by_category'].items()}
                    SUMMARY.write_text(json.dumps(snap, ensure_ascii=False, indent=2), encoding='utf-8')
            q.task_done()

    threads = [threading.Thread(target=worker, daemon=True) for _ in range(WORKERS)]
    for t in threads:
        t.start()
    for t in threads:
        t.join()
    rows.sort(key=lambda x: x['idx'])
    snap = stats_snapshot(len(rows), rows)
    snap['by_category'] = {k: dict(v) for k, v in snap['by_category'].items()}
    SUMMARY.write_text(json.dumps(snap, ensure_ascii=False, indent=2), encoding='utf-8')

if __name__ == '__main__':
    main()
