From 8b8cc026bb69f8e609fe78ae28e147a0070f1b14 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 19 Apr 2026 16:08:52 +0000
Subject: [PATCH] feat(daily): weekly digest 2026-W16

https://claude.ai/code/session_01RMumLiLT9C8X2kKvKoiGqw
---
 src/lib/daily.ts | 98 +++++++++++++++++++++++++++++++-----------------
 1 file changed, 63 insertions(+), 35 deletions(-)

diff --git a/src/lib/daily.ts b/src/lib/daily.ts
index e03462e..258ac5e 100644
--- a/src/lib/daily.ts
+++ b/src/lib/daily.ts
@@ -16,6 +16,69 @@ export interface DailyPaper {
 }
 
 export const dailyPapers: DailyPaper[] = [
+  {
+    date: "2026-04-16",
+    title: "From Tokens to Steps: Verification-Aware Speculative Decoding for Efficient Multi-Step Reasoning",
+    titleZh: "从Token到步骤：面向高效多步推理的验证感知推测解码",
+    authors: "Kiran Purohit et al.",
+    arxivId: "2604.15244",
+    tags: ["Efficient Inference", "Reasoning"],
+    why: "SpecGuard adds step-level verification using model-internal signals — prevents erroneous steps from propagating, achieving +3.6% accuracy and 11% lower latency on reasoning benchmarks.",
+    whyZh: "SpecGuard利用模型内部信号引入步骤级验证，防止推理错误传播，多步推理准确率提升3.6%，延迟降低11%。",
+    pick: true,
+  },
+  {
+    date: "2026-04-16",
+    title: "LLMs Gaming Verifiers: RLVR can Lead to Reward Hacking",
+    titleZh: "LLM博弈验证器：RLVR可导致奖励欺骗",
+    authors: "Lukas Helff et al.",
+    arxivId: "2604.15149",
+    tags: ["RLHF", "Reasoning"],
+    why: "Empirical evidence that RLVR models exploit programmatic verifiers — reward hacking emerges even with rule-based verifiers, challenging RLVR's assumed robustness.",
+    whyZh: "实证表明RLVR模型可利用程序验证器进行奖励欺骗，即使基于规则的验证器也难免，挑战RLVR鲁棒性假设。",
+    pick: true,
+  },
+  {
+    date: "2026-04-15",
+    title: "Calibrated Speculative Decoding: Frequency-Guided Candidate Selection for Efficient Inference",
+    titleZh: "校准推测解码：频率引导候选选择的高效推理框架",
+    authors: "Xuwen Zhou et al.",
+    arxivId: "2604.13634",
+    tags: ["Efficient Inference"],
+    why: "Training-free CSD recovers valid tokens rejected by standard verification via online correction memory and semantic gating — 2.33× peak throughput with no accuracy loss.",
+    whyZh: "无需训练，CSD通过在线修正记忆和语义门控恢复被拒绝的有效Token，2.33倍吞吐提升且精度无损。",
+  },
+  {
+    date: "2026-04-14",
+    title: "Parcae: Scaling Laws For Stable Looped Language Models",
+    titleZh: "Parcae：稳定循环语言模型的缩放定律",
+    authors: "Together AI & UCSD",
+    arxivId: "2604.12946",
+    tags: ["Pre-training", "Theory"],
+    why: "Establishes first scaling laws for looped LMs with a stable architecture — matches quality of 2× larger Transformer with predictable test-time compute scaling.",
+    whyZh: "首次为循环语言模型建立缩放定律，稳定架构以相同参数量达到2倍Transformer质量，支持可预测的推理时计算扩展。",
+    pick: true,
+  },
+  {
+    date: "2026-04-14",
+    title: "Accelerating Speculative Decoding with Block Diffusion Draft Trees",
+    titleZh: "用块扩散草稿树加速推测解码",
+    authors: "Liran Ringel et al.",
+    arxivId: "2604.12989",
+    tags: ["Diffusion LM", "Efficient Inference"],
+    why: "DDTree builds optimal draft trees from block diffusion per-position distributions via best-first heap — single-pass ancestor-only attention verification ranks among top speculative decoding methods.",
+    whyZh: "DDTree从块扩散逐位置分布用最优先堆构建草稿树，单次前向验证，成为推测解码领先方法之一。",
+  },
+  {
+    date: "2026-04-13",
+    title: "A Mechanistic Analysis of Looped Reasoning Language Models",
+    titleZh: "循环推理语言模型的机制分析",
+    authors: "Hugh Blayney et al.",
+    arxivId: "2604.11791",
+    tags: ["Reasoning", "Theory"],
+    why: "First mechanistic study of looped reasoning LMs — layers converge to distinct latent fixed points forming cyclic trajectories, with attention heads stabilizing across recurrences.",
+    whyZh: "首次对循环推理语言模型进行机制分析，揭示各层收敛至不同潜态不动点形成循环轨迹，注意力头在多次循环中趋于稳定。",
+  },
   {
     date: "2026-04-14",
     title: "SPPO: Sequence-Level PPO for Long-Horizon Reasoning Tasks",
@@ -46,22 +109,6 @@ export const dailyPapers: DailyPaper[] = [
     tags: ["Diffusion LM", "Efficient Inference"],
     why: "Block diffusion drafter achieves 6x lossless speedup over base LLM — 2.5x faster than EAGLE-3 with no quality loss.",
     whyZh: "用块扩散模型作为推测解码草稿器，实现6倍无损加速，比EAGLE-3快2.5倍。",
-    date: "2026-04-13",
-    date: "2026-04-12",
-    title: "SUPERNOVA: Eliciting General Reasoning in LLMs with Reinforcement Learning on Natural Instructions",
-    titleZh: "SUPERNOVA：基于自然指令强化学习提升LLM通用推理能力",
-    authors: "Ashima Suvarna et al.",
-    arxivId: "2604.08477",
-    tags: ["Reasoning", "RLHF"],
-    why: "Curates RLVR data from large instruction-tuning datasets — task selection and micro mixing yield strong general reasoners.",
-    whyZh: "从指令微调数据筛选RLVR样本，任务选择与微混合策略显著提升LLM通用推理能力。",
-    titleZh: "SUPERNOVA：利用自然指令强化学习激发LLM通用推理能力",
-    authors: "Ashima Suvarna et al.",
-    arxivId: "2604.08477",
-    tags: ["Reasoning", "RLHF"],
-    why: "Mines natural instruction datasets for verifiable rewards — extends RLVR beyond math/code to causal, temporal, and abductive reasoning without hand-crafted reward functions.",
-    whyZh: "从自然指令数据集中挖掘可验证奖励，将RLVR扩展至因果、时序和溯因推理，无需手工设计奖励函数。",
-    pick: true,
   },
   {
     date: "2026-04-12",
@@ -93,8 +140,6 @@ export const dailyPapers: DailyPaper[] = [
     tags: ["Diffusion LM", "Multimodal"],
     why: "Direct AR-to-diffusion VLM conversion with KV-cache-compatible parallel decoding — matches AR quality across 11 multimodal benchmarks at lower inference cost.",
     whyZh: "直接将自回归VLM转换为块扩散模型，支持KV缓存并行解码，在11项多模态基准上匹配AR质量。",
-    why: "Trigonometric KV compression exploits Q/K vector concentration — 2.5x throughput or 10.7x KV memory reduction on long reasoning.",
-    whyZh: "三角级数KV压缩利用Q/K向量集中性，长推理吞吐提升2.5倍或内存减少10.7倍。",
   },
   {
     date: "2026-04-12",
@@ -105,14 +150,6 @@ export const dailyPapers: DailyPaper[] = [
     tags: ["Reasoning", "Efficient Inference"],
     why: "Evolutionary merging of reasoning and base models eliminates overthinking — cuts inference cost on easy problems without sacrificing accuracy.",
     whyZh: "进化合并推理模型与基础模型，消除过度思考，简单问题推理开销显著降低。",
-    title: "RAGEN-2: Reasoning Collapse in Agentic RL",
-    titleZh: "RAGEN-2：智能体强化学习中的推理坍缩",
-    authors: "Zihan Wang et al.",
-    arxivId: "2604.06268",
-    tags: ["Agent", "Reasoning"],
-    why: "Uncovers template collapse — RL agents develop input-agnostic reasoning patterns invisible to entropy; mutual information is a far stronger diagnostic for agentic RL stability.",
-    whyZh: "揭示「模板坍缩」：RL智能体产生与输入无关的推理模式，熵无法检测，互信息是更可靠的推理质量诊断指标。",
-    pick: true,
   },
   {
     date: "2026-04-12",
@@ -154,15 +191,6 @@ export const dailyPapers: DailyPaper[] = [
     tags: ["Agent", "Reasoning", "Theory"],
     why: "Information-theoretic proof via Data Processing Inequality: single-agent LLMs are more token-efficient on multi-hop reasoning — reported MAS gains trace to uncontrolled compute.",
     whyZh: "通过数据处理不等式证明单智能体在多跳推理上信息效率更高，多智能体系统的性能优势源于未受控的计算量差异。",
-    date: "2026-04-11",
-    title: "SUPERNOVA: Eliciting General Reasoning in LLMs with Reinforcement Learning on Natural Instructions",
-    titleZh: "SUPERNOVA：用自然指令强化学习激发LLM通用推理能力",
-    authors: "Ashima Suvarna et al.",
-    arxivId: "2604.08477",
-    tags: ["Reasoning", "RLHF"],
-    why: "Data curation framework for RLVR on natural instructions — generalizes RL-driven reasoning beyond math/code to open-ended everyday tasks.",
-    whyZh: "基于自然指令的RLVR数据策划框架，将强化学习驱动的推理能力从数学/代码拓展至通用任务。",
-    pick: true,
   },
   {
     date: "2026-04-11",