Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 63 additions & 35 deletions src/lib/daily.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,69 @@ export interface DailyPaper {
}

export const dailyPapers: DailyPaper[] = [
{
date: "2026-04-16",
title: "From Tokens to Steps: Verification-Aware Speculative Decoding for Efficient Multi-Step Reasoning",
titleZh: "从Token到步骤:面向高效多步推理的验证感知推测解码",
authors: "Kiran Purohit et al.",
arxivId: "2604.15244",
tags: ["Efficient Inference", "Reasoning"],
why: "SpecGuard adds step-level verification using model-internal signals — prevents erroneous steps from propagating, achieving +3.6% accuracy and 11% lower latency on reasoning benchmarks.",
whyZh: "SpecGuard利用模型内部信号引入步骤级验证,防止推理错误传播,多步推理准确率提升3.6%,延迟降低11%。",
pick: true,
},
{
date: "2026-04-16",
title: "LLMs Gaming Verifiers: RLVR can Lead to Reward Hacking",
titleZh: "LLM博弈验证器:RLVR可导致奖励欺骗",
authors: "Lukas Helff et al.",
arxivId: "2604.15149",
tags: ["RLHF", "Reasoning"],
why: "Empirical evidence that RLVR models exploit programmatic verifiers — reward hacking emerges even with rule-based verifiers, challenging RLVR's assumed robustness.",
whyZh: "实证表明RLVR模型可利用程序验证器进行奖励欺骗,即使基于规则的验证器也难免,挑战RLVR鲁棒性假设。",
pick: true,
},
{
date: "2026-04-15",
title: "Calibrated Speculative Decoding: Frequency-Guided Candidate Selection for Efficient Inference",
titleZh: "校准推测解码:频率引导候选选择的高效推理框架",
authors: "Xuwen Zhou et al.",
arxivId: "2604.13634",
tags: ["Efficient Inference"],
why: "Training-free CSD recovers valid tokens rejected by standard verification via online correction memory and semantic gating — 2.33× peak throughput with no accuracy loss.",
whyZh: "无需训练,CSD通过在线修正记忆和语义门控恢复被拒绝的有效Token,2.33倍吞吐提升且精度无损。",
},
{
date: "2026-04-14",
title: "Parcae: Scaling Laws For Stable Looped Language Models",
titleZh: "Parcae:稳定循环语言模型的缩放定律",
authors: "Together AI & UCSD",
arxivId: "2604.12946",
tags: ["Pre-training", "Theory"],
why: "Establishes first scaling laws for looped LMs with a stable architecture — matches quality of 2× larger Transformer with predictable test-time compute scaling.",
whyZh: "首次为循环语言模型建立缩放定律,稳定架构以相同参数量达到2倍Transformer质量,支持可预测的推理时计算扩展。",
pick: true,
},
{
date: "2026-04-14",
title: "Accelerating Speculative Decoding with Block Diffusion Draft Trees",
titleZh: "用块扩散草稿树加速推测解码",
authors: "Liran Ringel et al.",
arxivId: "2604.12989",
tags: ["Diffusion LM", "Efficient Inference"],
why: "DDTree builds optimal draft trees from block diffusion per-position distributions via best-first heap — single-pass ancestor-only attention verification ranks among top speculative decoding methods.",
whyZh: "DDTree从块扩散逐位置分布用最优先堆构建草稿树,单次前向验证,成为推测解码领先方法之一。",
},
{
date: "2026-04-13",
title: "A Mechanistic Analysis of Looped Reasoning Language Models",
titleZh: "循环推理语言模型的机制分析",
authors: "Hugh Blayney et al.",
arxivId: "2604.11791",
tags: ["Reasoning", "Theory"],
why: "First mechanistic study of looped reasoning LMs — layers converge to distinct latent fixed points forming cyclic trajectories, with attention heads stabilizing across recurrences.",
whyZh: "首次对循环推理语言模型进行机制分析,揭示各层收敛至不同潜态不动点形成循环轨迹,注意力头在多次循环中趋于稳定。",
},
{
date: "2026-04-14",
title: "SPPO: Sequence-Level PPO for Long-Horizon Reasoning Tasks",
Expand Down Expand Up @@ -46,22 +109,6 @@ export const dailyPapers: DailyPaper[] = [
tags: ["Diffusion LM", "Efficient Inference"],
why: "Block diffusion drafter achieves 6x lossless speedup over base LLM — 2.5x faster than EAGLE-3 with no quality loss.",
whyZh: "用块扩散模型作为推测解码草稿器,实现6倍无损加速,比EAGLE-3快2.5倍。",
date: "2026-04-13",
date: "2026-04-12",
title: "SUPERNOVA: Eliciting General Reasoning in LLMs with Reinforcement Learning on Natural Instructions",
titleZh: "SUPERNOVA:基于自然指令强化学习提升LLM通用推理能力",
authors: "Ashima Suvarna et al.",
arxivId: "2604.08477",
tags: ["Reasoning", "RLHF"],
why: "Curates RLVR data from large instruction-tuning datasets — task selection and micro mixing yield strong general reasoners.",
whyZh: "从指令微调数据筛选RLVR样本,任务选择与微混合策略显著提升LLM通用推理能力。",
titleZh: "SUPERNOVA:利用自然指令强化学习激发LLM通用推理能力",
authors: "Ashima Suvarna et al.",
arxivId: "2604.08477",
tags: ["Reasoning", "RLHF"],
why: "Mines natural instruction datasets for verifiable rewards — extends RLVR beyond math/code to causal, temporal, and abductive reasoning without hand-crafted reward functions.",
whyZh: "从自然指令数据集中挖掘可验证奖励,将RLVR扩展至因果、时序和溯因推理,无需手工设计奖励函数。",
pick: true,
},
{
date: "2026-04-12",
Expand Down Expand Up @@ -93,8 +140,6 @@ export const dailyPapers: DailyPaper[] = [
tags: ["Diffusion LM", "Multimodal"],
why: "Direct AR-to-diffusion VLM conversion with KV-cache-compatible parallel decoding — matches AR quality across 11 multimodal benchmarks at lower inference cost.",
whyZh: "直接将自回归VLM转换为块扩散模型,支持KV缓存并行解码,在11项多模态基准上匹配AR质量。",
why: "Trigonometric KV compression exploits Q/K vector concentration — 2.5x throughput or 10.7x KV memory reduction on long reasoning.",
whyZh: "三角级数KV压缩利用Q/K向量集中性,长推理吞吐提升2.5倍或内存减少10.7倍。",
},
{
date: "2026-04-12",
Expand All @@ -105,14 +150,6 @@ export const dailyPapers: DailyPaper[] = [
tags: ["Reasoning", "Efficient Inference"],
why: "Evolutionary merging of reasoning and base models eliminates overthinking — cuts inference cost on easy problems without sacrificing accuracy.",
whyZh: "进化合并推理模型与基础模型,消除过度思考,简单问题推理开销显著降低。",
title: "RAGEN-2: Reasoning Collapse in Agentic RL",
titleZh: "RAGEN-2:智能体强化学习中的推理坍缩",
authors: "Zihan Wang et al.",
arxivId: "2604.06268",
tags: ["Agent", "Reasoning"],
why: "Uncovers template collapse — RL agents develop input-agnostic reasoning patterns invisible to entropy; mutual information is a far stronger diagnostic for agentic RL stability.",
whyZh: "揭示「模板坍缩」:RL智能体产生与输入无关的推理模式,熵无法检测,互信息是更可靠的推理质量诊断指标。",
pick: true,
},
{
date: "2026-04-12",
Expand Down Expand Up @@ -154,15 +191,6 @@ export const dailyPapers: DailyPaper[] = [
tags: ["Agent", "Reasoning", "Theory"],
why: "Information-theoretic proof via Data Processing Inequality: single-agent LLMs are more token-efficient on multi-hop reasoning — reported MAS gains trace to uncontrolled compute.",
whyZh: "通过数据处理不等式证明单智能体在多跳推理上信息效率更高,多智能体系统的性能优势源于未受控的计算量差异。",
date: "2026-04-11",
title: "SUPERNOVA: Eliciting General Reasoning in LLMs with Reinforcement Learning on Natural Instructions",
titleZh: "SUPERNOVA:用自然指令强化学习激发LLM通用推理能力",
authors: "Ashima Suvarna et al.",
arxivId: "2604.08477",
tags: ["Reasoning", "RLHF"],
why: "Data curation framework for RLVR on natural instructions — generalizes RL-driven reasoning beyond math/code to open-ended everyday tasks.",
whyZh: "基于自然指令的RLVR数据策划框架,将强化学习驱动的推理能力从数学/代码拓展至通用任务。",
pick: true,
},
{
date: "2026-04-11",
Expand Down