From 3afa49146ff76c7e92a0e5e8781bdc9fdc16bec5 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 22 Apr 2026 16:39:41 +0000 Subject: [PATCH] feat(daily): 2026-04-22 digest https://claude.ai/code/session_015EPed7aML4r9hjyCwtBieW --- src/lib/daily.ts | 48 +++++++++++++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/src/lib/daily.ts b/src/lib/daily.ts index 141f5a9..4546321 100644 --- a/src/lib/daily.ts +++ b/src/lib/daily.ts @@ -16,6 +16,37 @@ export interface DailyPaper { } export const dailyPapers: DailyPaper[] = [ + { + date: "2026-04-22", + title: "KnowRL: Boosting LLM Reasoning via Reinforcement Learning with Minimal-Sufficient Knowledge Guidance", + titleZh: "KnowRL:最小充分知识引导的强化学习提升LLM推理", + authors: "et al.", + arxivId: "2604.12627", + tags: ["Reasoning", "RLHF"], + why: "Decomposes guidance into minimal-sufficient atomic knowledge points — slashes RLVR reward sparsity, +9.63 accuracy on 8 reasoning benchmarks at 1.5B scale.", + whyZh: "将提示分解为最小充分原子知识点,约束子集搜索缓解RLVR奖励稀疏,8项推理基准准确率平均提升9.63点。", + pick: true, + }, + { + date: "2026-04-22", + title: "LLM Reasoning Is Latent, Not the Chain of Thought", + titleZh: "LLM推理隐于潜在状态,并非思维链表面", + authors: "Wenshuo Wang", + arxivId: "2604.15726", + tags: ["Reasoning", "Theory"], + why: "Formalizes three hypotheses for LLM reasoning — evidence most supports latent-state trajectories, not surface CoT, as the true reasoning object.", + whyZh: "形式化三种推理假设,现有证据最支持潜在状态轨迹为LLM推理真实载体,而非表面思维链,重塑基准设计方向。", + }, + { + date: "2026-04-22", + title: "Demystifying the Unreasonable Effectiveness of Online Alignment Methods", + titleZh: "揭秘在线对齐方法的非凡有效性", + authors: "Enoch Hyunwook Kang", + arxivId: "2604.17207", + tags: ["Alignment", "Theory"], + why: "Proves online RLHF and DPO achieve better temperature-zero regret than KL-based bounds suggest — closes the theory-practice gap in LLM alignment.", + whyZh: "证明在线RLHF和DPO的零温度遗憾界优于KL分析所示,从理论上解释在线对齐方法的强实证表现。", + }, { date: "2026-04-16", title: "Introspective Diffusion Language Models", @@ -36,15 +67,6 @@ export const dailyPapers: DailyPaper[] = [ tags: ["Diffusion LM", "Efficient Inference"], why: "DDTree builds a best-first draft tree from block diffusion per-position distributions — SOTA speculative decoding verified in one target model forward pass.", whyZh: "DDTree从块扩散逐位置分布构建最优优先草稿树,单次目标模型前向传播完成验证,达到推测解码SOTA。", - date: "2026-04-15", - title: "Introspective Diffusion Language Models", - titleZh: "内省扩散语言模型", - authors: "Yifan Yu et al.", - arxivId: "2604.11035", - tags: ["Diffusion LM", "Reasoning", "Efficient Inference"], - why: "Introspective strided decoding fixes diffusion LM's consistency gap — I-DLM-8B beats LLaDA-2.1-mini (16B) at 2.9–4.1x higher throughput.", - whyZh: "内省跨步解码修复扩散LM一致性缺陷,I-DLM-8B以2.9-4.1倍吞吐超越更大规模LLaDA。", - pick: true, }, { date: "2026-04-15", @@ -96,14 +118,6 @@ export const dailyPapers: DailyPaper[] = [ tags: ["Diffusion LM", "Efficient Inference"], why: "Block diffusion drafter achieves 6x lossless speedup over base LLM — 2.5x faster than EAGLE-3 with no quality loss.", whyZh: "用块扩散模型作为推测解码草稿器,实现6倍无损加速,比EAGLE-3快2.5倍。", - date: "2026-04-12", - title: "SUPERNOVA: Eliciting General Reasoning in LLMs with Reinforcement Learning on Natural Instructions", - titleZh: "SUPERNOVA:利用自然指令强化学习激发LLM通用推理能力", - authors: "Ashima Suvarna et al.", - arxivId: "2604.08477", - tags: ["Reasoning", "RLHF"], - why: "Mines natural instruction datasets for verifiable rewards — extends RLVR beyond math/code to causal, temporal, and abductive reasoning without hand-crafted reward functions.", - whyZh: "从自然指令数据集中挖掘可验证奖励,将RLVR扩展至因果、时序和溯因推理,无需手工设计奖励函数。", }, { date: "2026-04-12",