#!/usr/bin/env python3
import json, sys
from pathlib import Path

parts = [Path(p) for p in sys.argv[1:-2]]
out = Path(sys.argv[-2])
retry_out = Path(sys.argv[-1])
rows = {}
for p in parts:
    if not p.exists():
        continue
    for line in p.read_text(encoding='utf-8').splitlines():
        if not line.strip():
            continue
        obj = json.loads(line)
        idx = obj['idx']
        prev = rows.get(idx)
        # 更优记录：非超时、非 LLM_ERROR、answer 非空 优先；否则后写覆盖前写
        def rank(x):
            ans = x.get('answer_clean') or ''
            good = (not x.get('timeout')) and x.get('returncode') == 0 and ans.strip() and '[LLM_ERROR]' not in ans
            return (1 if good else 0, 0 if x.get('timeout') else 1, len(ans))
        if prev is None or rank(obj) >= rank(prev):
            rows[idx] = obj

with out.open('w', encoding='utf-8') as f:
    for idx in sorted(rows):
        f.write(json.dumps(rows[idx], ensure_ascii=False) + '\n')

retry = []
for idx in range(1, 1001):
    obj = rows.get(idx)
    if obj is None:
        retry.append({'idx': idx})
        continue
    ans = obj.get('answer_clean') or ''
    bad = obj.get('timeout') or obj.get('returncode') != 0 or (not ans.strip()) or ('[LLM_ERROR]' in ans)
    if bad:
        retry.append({'idx': idx, 'prompt': obj.get('prompt'), 'category': obj.get('category')})

with retry_out.open('w', encoding='utf-8') as f:
    for item in retry:
        f.write(json.dumps(item, ensure_ascii=False) + '\n')

print(json.dumps({'merged': len(rows), 'retry_count': len(retry)}, ensure_ascii=False))
