#!/usr/bin/env python3
import json
import subprocess
import time
from pathlib import Path

PROMPTS = Path('/tmp/zhangxuefeng_1000_prompts.jsonl')
RESULTS = Path('/tmp/zhangxuefeng_1000_results.jsonl')
SUMMARY = Path('/tmp/zhangxuefeng_1000_summary.json')
CONTAINER = 'nanobot-zhangxuefeng_method'
CONFIG = '/root/.nanobot/zhangxuefeng_method_config.json'
SESSION_PREFIX = 'cli:batch1000'
TIMEOUT = 120


def extract_answer(stdout: str) -> str:
    text = stdout.replace('\r\n', '\n').strip()
    if '🐈 nanobot' in text:
        text = text.split('🐈 nanobot', 1)[1].strip()
    lines = [line.rstrip() for line in text.splitlines()]
    filtered = []
    for line in lines:
        s = line.strip()
        if not s:
            if filtered and filtered[-1] != '':
                filtered.append('')
            continue
        if s.startswith('Using config:'):
            continue
        filtered.append(line)
    while filtered and filtered[0] == '':
        filtered.pop(0)
    while filtered and filtered[-1] == '':
        filtered.pop()
    return '\n'.join(filtered).strip()


def run_one(idx: int, prompt: str):
    session = f'{SESSION_PREFIX}_{idx:04d}'
    cmd = [
        'docker', 'exec', CONTAINER, 'sh', '-lc',
        f'nanobot agent -c {CONFIG} -s {session} -m {json.dumps(prompt, ensure_ascii=False)} --no-markdown'
    ]
    start = time.time()
    try:
        proc = subprocess.run(cmd, capture_output=True, text=True, timeout=TIMEOUT)
        elapsed = round(time.time() - start, 3)
        stdout = proc.stdout or ''
        stderr = proc.stderr or ''
        return {
            'idx': idx,
            'prompt': prompt,
            'session': session,
            'returncode': proc.returncode,
            'elapsed_sec': elapsed,
            'timeout': False,
            'stdout_raw': stdout,
            'stderr_raw': stderr,
            'answer_clean': extract_answer(stdout),
        }
    except subprocess.TimeoutExpired as e:
        elapsed = round(time.time() - start, 3)
        stdout = e.stdout or ''
        stderr = e.stderr or ''
        if isinstance(stdout, bytes):
            stdout = stdout.decode('utf-8', 'ignore')
        if isinstance(stderr, bytes):
            stderr = stderr.decode('utf-8', 'ignore')
        return {
            'idx': idx,
            'prompt': prompt,
            'session': session,
            'returncode': None,
            'elapsed_sec': elapsed,
            'timeout': True,
            'stdout_raw': stdout,
            'stderr_raw': stderr,
            'answer_clean': extract_answer(stdout),
        }


def main():
    total = 0
    ok = 0
    timeouts = 0
    failed = 0
    RESULTS.write_text('', encoding='utf-8')
    with PROMPTS.open('r', encoding='utf-8') as f, RESULTS.open('a', encoding='utf-8') as out:
        for line in f:
            line = line.strip()
            if not line:
                continue
            item = json.loads(line)
            total += 1
            result = run_one(item['idx'], item['prompt'])
            result['category'] = item.get('category')
            out.write(json.dumps(result, ensure_ascii=False) + '\n')
            out.flush()
            if result['timeout']:
                timeouts += 1
            elif result['returncode'] == 0 and result['answer_clean']:
                ok += 1
            else:
                failed += 1
            if total % 25 == 0:
                SUMMARY.write_text(json.dumps({
                    'total_processed': total,
                    'ok': ok,
                    'failed': failed,
                    'timeouts': timeouts,
                    'updated_at': time.strftime('%Y-%m-%d %H:%M:%S'),
                }, ensure_ascii=False, indent=2), encoding='utf-8')
    SUMMARY.write_text(json.dumps({
        'total_processed': total,
        'ok': ok,
        'failed': failed,
        'timeouts': timeouts,
        'updated_at': time.strftime('%Y-%m-%d %H:%M:%S'),
    }, ensure_ascii=False, indent=2), encoding='utf-8')

if __name__ == '__main__':
    main()
