yayajjiang · yayajjiang · Apr 17, 2026
diff --git a/src/lib/daily.ts b/src/lib/daily.ts
@@ -16,6 +16,37 @@ export interface DailyPaper {
 }
 
 export const dailyPapers: DailyPaper[] = [
+  {
+    date: "2026-04-17",
+    title: "From P(y|x) to P(y): Investigating Reinforcement Learning in Pre-train Space",
+    titleZh: "从P(y|x)到P(y)：在预训练空间探索强化学习",
+    authors: "Yuqiao Tan et al.",
+    arxivId: "2604.14142",
+    tags: ["Reasoning", "RLHF", "Pre-training"],
+    why: "DSRL: NSR in pre-train space expands reasoning horizon, then standard RL fine-tunes — outperforms all strong RLVR baselines.",
+    whyZh: "PreRL在预训练空间扩展推理边界，NSR快速剪枝错误路径，DSRL全面超越强基线。",
+    pick: true,
+  },
+  {
+    date: "2026-04-17",
+    title: "LongCoT: Benchmarking Long-Horizon Chain-of-Thought Reasoning",
+    titleZh: "LongCoT：长时域思维链推理基准评测",
+    authors: "Sumeet Ramesh Motwani et al.",
+    arxivId: "2604.14140",
+    tags: ["Reasoning", "Benchmark"],
+    why: "2,500 expert problems requiring up to 100K reasoning tokens — best frontier models score <10%, exposing a major long-horizon gap.",
+    whyZh: "2500道专家题需推理10万token，最优模型不足10%，长链推理缺口巨大。",
+  },
+  {
+    date: "2026-04-17",
+    title: "Calibration-Aware Policy Optimization for Reasoning LLMs",
+    titleZh: "面向推理LLM的校准感知策略优化",
+    authors: "Ziqi Wang et al.",
+    arxivId: "2604.12632",
+    tags: ["Reasoning", "RLHF"],
+    why: "CAPO fixes GRPO's overconfidence via logistic AUC surrogate loss — jointly optimizes calibration and accuracy. ACL 2026.",
+    whyZh: "CAPO通过AUC替代损失修正GRPO过度自信，同时优化准确率与不确定性校准，ACL 2026录用。",
+  },
   {
     date: "2026-04-14",
     title: "SPPO: Sequence-Level PPO for Long-Horizon Reasoning Tasks",
@@ -46,15 +77,10 @@ export const dailyPapers: DailyPaper[] = [
     tags: ["Diffusion LM", "Efficient Inference"],
     why: "Block diffusion drafter achieves 6x lossless speedup over base LLM — 2.5x faster than EAGLE-3 with no quality loss.",
     whyZh: "用块扩散模型作为推测解码草稿器，实现6倍无损加速，比EAGLE-3快2.5倍。",
-    date: "2026-04-13",
-    date: "2026-04-12",
+  },
+  {
+    date: "2026-04-11",
     title: "SUPERNOVA: Eliciting General Reasoning in LLMs with Reinforcement Learning on Natural Instructions",
-    titleZh: "SUPERNOVA：基于自然指令强化学习提升LLM通用推理能力",
-    authors: "Ashima Suvarna et al.",
-    arxivId: "2604.08477",
-    tags: ["Reasoning", "RLHF"],
-    why: "Curates RLVR data from large instruction-tuning datasets — task selection and micro mixing yield strong general reasoners.",
-    whyZh: "从指令微调数据筛选RLVR样本，任务选择与微混合策略显著提升LLM通用推理能力。",
     titleZh: "SUPERNOVA：利用自然指令强化学习激发LLM通用推理能力",
     authors: "Ashima Suvarna et al.",
     arxivId: "2604.08477",
@@ -93,8 +119,6 @@ export const dailyPapers: DailyPaper[] = [
     tags: ["Diffusion LM", "Multimodal"],
     why: "Direct AR-to-diffusion VLM conversion with KV-cache-compatible parallel decoding — matches AR quality across 11 multimodal benchmarks at lower inference cost.",
     whyZh: "直接将自回归VLM转换为块扩散模型，支持KV缓存并行解码，在11项多模态基准上匹配AR质量。",
-    why: "Trigonometric KV compression exploits Q/K vector concentration — 2.5x throughput or 10.7x KV memory reduction on long reasoning.",
-    whyZh: "三角级数KV压缩利用Q/K向量集中性，长推理吞吐提升2.5倍或内存减少10.7倍。",
   },
   {
     date: "2026-04-12",
@@ -105,14 +129,6 @@ export const dailyPapers: DailyPaper[] = [
     tags: ["Reasoning", "Efficient Inference"],
     why: "Evolutionary merging of reasoning and base models eliminates overthinking — cuts inference cost on easy problems without sacrificing accuracy.",
     whyZh: "进化合并推理模型与基础模型，消除过度思考，简单问题推理开销显著降低。",
-    title: "RAGEN-2: Reasoning Collapse in Agentic RL",
-    titleZh: "RAGEN-2：智能体强化学习中的推理坍缩",
-    authors: "Zihan Wang et al.",
-    arxivId: "2604.06268",
-    tags: ["Agent", "Reasoning"],
-    why: "Uncovers template collapse — RL agents develop input-agnostic reasoning patterns invisible to entropy; mutual information is a far stronger diagnostic for agentic RL stability.",
-    whyZh: "揭示「模板坍缩」：RL智能体产生与输入无关的推理模式，熵无法检测，互信息是更可靠的推理质量诊断指标。",
-    pick: true,
   },
   {
     date: "2026-04-12",
@@ -154,15 +170,6 @@ export const dailyPapers: DailyPaper[] = [
     tags: ["Agent", "Reasoning", "Theory"],
     why: "Information-theoretic proof via Data Processing Inequality: single-agent LLMs are more token-efficient on multi-hop reasoning — reported MAS gains trace to uncontrolled compute.",
     whyZh: "通过数据处理不等式证明单智能体在多跳推理上信息效率更高，多智能体系统的性能优势源于未受控的计算量差异。",
-    date: "2026-04-11",
-    title: "SUPERNOVA: Eliciting General Reasoning in LLMs with Reinforcement Learning on Natural Instructions",
-    titleZh: "SUPERNOVA：用自然指令强化学习激发LLM通用推理能力",
-    authors: "Ashima Suvarna et al.",
-    arxivId: "2604.08477",
-    tags: ["Reasoning", "RLHF"],
-    why: "Data curation framework for RLVR on natural instructions — generalizes RL-driven reasoning beyond math/code to open-ended everyday tasks.",
-    whyZh: "基于自然指令的RLVR数据策划框架，将强化学习驱动的推理能力从数学/代码拓展至通用任务。",
-    pick: true,
   },
   {
     date: "2026-04-11",

diff --git a/tsconfig.json b/tsconfig.json
@@ -1,7 +1,7 @@
 {
   "compilerOptions": {
     "target": "es5",
-    "ignoreDeprecations": "6.0",
+    "ignoreDeprecations": "5.0",
     "lib": ["dom", "dom.iterable", "esnext"],
     "allowJs": true,
     "skipLibCheck": true,