Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 31 additions & 35 deletions src/lib/daily.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,37 @@ export interface DailyPaper {
}

export const dailyPapers: DailyPaper[] = [
{
date: "2026-04-21",
title: "LLM Reasoning Is Latent, Not the Chain of Thought",
titleZh: "LLM推理是潜在的,而非思维链",
authors: "Wenshuo Wang",
arxivId: "2604.15726",
tags: ["Theory", "Reasoning"],
why: "Position paper: LLM reasoning lives in latent states, not surface CoT — reshapes how we evaluate interpretability and reasoning benchmarks.",
whyZh: "论证LLM推理本质是潜在状态轨迹而非思维链,重塑推理基准与可解释性研究的核心假设。",
pick: true,
},
{
date: "2026-04-21",
title: "Knowing When to Quit: A Principled Framework for Dynamic Abstention in LLM Reasoning",
titleZh: "适时放弃:LLM推理动态弃权的原则性框架",
authors: "Hen Davidov et al.",
arxivId: "2604.18419",
tags: ["Reasoning", "Efficient Inference"],
why: "RL-based mid-generation abstention terminates bad reasoning traces early — principled value-threshold rule cuts wasted compute.",
whyZh: "RL动态弃权框架在生成中途终止低质推理链,价值阈值规则优于启发式基线,节省推理算力。",
},
{
date: "2026-04-21",
title: "KnowRL: Boosting LLM Reasoning via Reinforcement Learning with Minimal-Sufficient Knowledge Guidance",
titleZh: "KnowRL:基于最小充分知识引导强化学习提升LLM推理",
authors: "Linhao Yu et al.",
arxivId: "2604.12627",
tags: ["Reasoning", "RLHF"],
why: "Decomposes knowledge hints into atomic points to solve reward sparsity in RL — 1.5B model hits 70% on reasoning benchmarks.",
whyZh: "将知识提示分解为原子知识点解决RL奖励稀疏问题,1.5B模型推理基准达70%。",
},
{
date: "2026-04-14",
title: "SPPO: Sequence-Level PPO for Long-Horizon Reasoning Tasks",
Expand Down Expand Up @@ -46,22 +77,6 @@ export const dailyPapers: DailyPaper[] = [
tags: ["Diffusion LM", "Efficient Inference"],
why: "Block diffusion drafter achieves 6x lossless speedup over base LLM — 2.5x faster than EAGLE-3 with no quality loss.",
whyZh: "用块扩散模型作为推测解码草稿器,实现6倍无损加速,比EAGLE-3快2.5倍。",
date: "2026-04-13",
date: "2026-04-12",
title: "SUPERNOVA: Eliciting General Reasoning in LLMs with Reinforcement Learning on Natural Instructions",
titleZh: "SUPERNOVA:基于自然指令强化学习提升LLM通用推理能力",
authors: "Ashima Suvarna et al.",
arxivId: "2604.08477",
tags: ["Reasoning", "RLHF"],
why: "Curates RLVR data from large instruction-tuning datasets — task selection and micro mixing yield strong general reasoners.",
whyZh: "从指令微调数据筛选RLVR样本,任务选择与微混合策略显著提升LLM通用推理能力。",
titleZh: "SUPERNOVA:利用自然指令强化学习激发LLM通用推理能力",
authors: "Ashima Suvarna et al.",
arxivId: "2604.08477",
tags: ["Reasoning", "RLHF"],
why: "Mines natural instruction datasets for verifiable rewards — extends RLVR beyond math/code to causal, temporal, and abductive reasoning without hand-crafted reward functions.",
whyZh: "从自然指令数据集中挖掘可验证奖励,将RLVR扩展至因果、时序和溯因推理,无需手工设计奖励函数。",
pick: true,
},
{
date: "2026-04-12",
Expand Down Expand Up @@ -93,8 +108,6 @@ export const dailyPapers: DailyPaper[] = [
tags: ["Diffusion LM", "Multimodal"],
why: "Direct AR-to-diffusion VLM conversion with KV-cache-compatible parallel decoding — matches AR quality across 11 multimodal benchmarks at lower inference cost.",
whyZh: "直接将自回归VLM转换为块扩散模型,支持KV缓存并行解码,在11项多模态基准上匹配AR质量。",
why: "Trigonometric KV compression exploits Q/K vector concentration — 2.5x throughput or 10.7x KV memory reduction on long reasoning.",
whyZh: "三角级数KV压缩利用Q/K向量集中性,长推理吞吐提升2.5倍或内存减少10.7倍。",
},
{
date: "2026-04-12",
Expand All @@ -105,14 +118,6 @@ export const dailyPapers: DailyPaper[] = [
tags: ["Reasoning", "Efficient Inference"],
why: "Evolutionary merging of reasoning and base models eliminates overthinking — cuts inference cost on easy problems without sacrificing accuracy.",
whyZh: "进化合并推理模型与基础模型,消除过度思考,简单问题推理开销显著降低。",
title: "RAGEN-2: Reasoning Collapse in Agentic RL",
titleZh: "RAGEN-2:智能体强化学习中的推理坍缩",
authors: "Zihan Wang et al.",
arxivId: "2604.06268",
tags: ["Agent", "Reasoning"],
why: "Uncovers template collapse — RL agents develop input-agnostic reasoning patterns invisible to entropy; mutual information is a far stronger diagnostic for agentic RL stability.",
whyZh: "揭示「模板坍缩」:RL智能体产生与输入无关的推理模式,熵无法检测,互信息是更可靠的推理质量诊断指标。",
pick: true,
},
{
date: "2026-04-12",
Expand Down Expand Up @@ -154,15 +159,6 @@ export const dailyPapers: DailyPaper[] = [
tags: ["Agent", "Reasoning", "Theory"],
why: "Information-theoretic proof via Data Processing Inequality: single-agent LLMs are more token-efficient on multi-hop reasoning — reported MAS gains trace to uncontrolled compute.",
whyZh: "通过数据处理不等式证明单智能体在多跳推理上信息效率更高,多智能体系统的性能优势源于未受控的计算量差异。",
date: "2026-04-11",
title: "SUPERNOVA: Eliciting General Reasoning in LLMs with Reinforcement Learning on Natural Instructions",
titleZh: "SUPERNOVA:用自然指令强化学习激发LLM通用推理能力",
authors: "Ashima Suvarna et al.",
arxivId: "2604.08477",
tags: ["Reasoning", "RLHF"],
why: "Data curation framework for RLVR on natural instructions — generalizes RL-driven reasoning beyond math/code to open-ended everyday tasks.",
whyZh: "基于自然指令的RLVR数据策划框架,将强化学习驱动的推理能力从数学/代码拓展至通用任务。",
pick: true,
},
{
date: "2026-04-11",
Expand Down
2 changes: 1 addition & 1 deletion tsconfig.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"compilerOptions": {
"target": "es5",
"ignoreDeprecations": "6.0",
"ignoreDeprecations": "5.0",
"lib": ["dom", "dom.iterable", "esnext"],
"allowJs": true,
"skipLibCheck": true,
Expand Down