Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 31 additions & 17 deletions src/lib/daily.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,37 @@ export interface DailyPaper {
}

export const dailyPapers: DailyPaper[] = [
{
date: "2026-04-22",
title: "KnowRL: Boosting LLM Reasoning via Reinforcement Learning with Minimal-Sufficient Knowledge Guidance",
titleZh: "KnowRL:最小充分知识引导的强化学习提升LLM推理",
authors: "et al.",
arxivId: "2604.12627",
tags: ["Reasoning", "RLHF"],
why: "Decomposes guidance into minimal-sufficient atomic knowledge points — slashes RLVR reward sparsity, +9.63 accuracy on 8 reasoning benchmarks at 1.5B scale.",
whyZh: "将提示分解为最小充分原子知识点,约束子集搜索缓解RLVR奖励稀疏,8项推理基准准确率平均提升9.63点。",
pick: true,
},
{
date: "2026-04-22",
title: "LLM Reasoning Is Latent, Not the Chain of Thought",
titleZh: "LLM推理隐于潜在状态,并非思维链表面",
authors: "Wenshuo Wang",
arxivId: "2604.15726",
tags: ["Reasoning", "Theory"],
why: "Formalizes three hypotheses for LLM reasoning — evidence most supports latent-state trajectories, not surface CoT, as the true reasoning object.",
whyZh: "形式化三种推理假设,现有证据最支持潜在状态轨迹为LLM推理真实载体,而非表面思维链,重塑基准设计方向。",
},
{
date: "2026-04-22",
title: "Demystifying the Unreasonable Effectiveness of Online Alignment Methods",
titleZh: "揭秘在线对齐方法的非凡有效性",
authors: "Enoch Hyunwook Kang",
arxivId: "2604.17207",
tags: ["Alignment", "Theory"],
why: "Proves online RLHF and DPO achieve better temperature-zero regret than KL-based bounds suggest — closes the theory-practice gap in LLM alignment.",
whyZh: "证明在线RLHF和DPO的零温度遗憾界优于KL分析所示,从理论上解释在线对齐方法的强实证表现。",
},
{
date: "2026-04-16",
title: "Introspective Diffusion Language Models",
Expand All @@ -36,15 +67,6 @@ export const dailyPapers: DailyPaper[] = [
tags: ["Diffusion LM", "Efficient Inference"],
why: "DDTree builds a best-first draft tree from block diffusion per-position distributions — SOTA speculative decoding verified in one target model forward pass.",
whyZh: "DDTree从块扩散逐位置分布构建最优优先草稿树,单次目标模型前向传播完成验证,达到推测解码SOTA。",
date: "2026-04-15",
title: "Introspective Diffusion Language Models",
titleZh: "内省扩散语言模型",
authors: "Yifan Yu et al.",
arxivId: "2604.11035",
tags: ["Diffusion LM", "Reasoning", "Efficient Inference"],
why: "Introspective strided decoding fixes diffusion LM's consistency gap — I-DLM-8B beats LLaDA-2.1-mini (16B) at 2.9–4.1x higher throughput.",
whyZh: "内省跨步解码修复扩散LM一致性缺陷,I-DLM-8B以2.9-4.1倍吞吐超越更大规模LLaDA。",
pick: true,
},
{
date: "2026-04-15",
Expand Down Expand Up @@ -96,14 +118,6 @@ export const dailyPapers: DailyPaper[] = [
tags: ["Diffusion LM", "Efficient Inference"],
why: "Block diffusion drafter achieves 6x lossless speedup over base LLM — 2.5x faster than EAGLE-3 with no quality loss.",
whyZh: "用块扩散模型作为推测解码草稿器,实现6倍无损加速,比EAGLE-3快2.5倍。",
date: "2026-04-12",
title: "SUPERNOVA: Eliciting General Reasoning in LLMs with Reinforcement Learning on Natural Instructions",
titleZh: "SUPERNOVA:利用自然指令强化学习激发LLM通用推理能力",
authors: "Ashima Suvarna et al.",
arxivId: "2604.08477",
tags: ["Reasoning", "RLHF"],
why: "Mines natural instruction datasets for verifiable rewards — extends RLVR beyond math/code to causal, temporal, and abductive reasoning without hand-crafted reward functions.",
whyZh: "从自然指令数据集中挖掘可验证奖励,将RLVR扩展至因果、时序和溯因推理,无需手工设计奖励函数。",
},
{
date: "2026-04-12",
Expand Down