Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 23 additions & 36 deletions src/lib/daily.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,27 @@ export interface DailyPaper {
}

export const dailyPapers: DailyPaper[] = [
{
date: "2026-04-20",
title: "From P(y|x) to P(y): Investigating Reinforcement Learning in Pre-train Space",
titleZh: "从P(y|x)到P(y):探索预训练空间中的强化学习",
authors: "Authors et al.",
arxivId: "2604.14142",
tags: ["Reasoning", "Pre-training", "RLHF"],
why: "PreRL applies RL in pre-train space P(y) without conditioning; NSR variant prunes wrong reasoning and boosts reflective steps 14.89×.",
whyZh: "PreRL直接在预训练空间P(y)中施加RL更新;NSR变体剪枝错误推理,反思步骤增加14.89倍。",
pick: true,
},
{
date: "2026-04-20",
title: "A Mechanistic Analysis of Looped Reasoning Language Models",
titleZh: "循环推理语言模型的机制性分析",
authors: "Authors et al.",
arxivId: "2604.11791",
tags: ["Reasoning", "Theory"],
why: "Dissects internal inference dynamics of layer-looped LLMs — shows how iterative computation reshapes reasoning stages vs. standard feedforward models.",
whyZh: "机制性分析层循环LLM的推理动态,揭示迭代层复用如何改变推理阶段,有别于标准前馈模型。",
},
{
date: "2026-04-14",
title: "SPPO: Sequence-Level PPO for Long-Horizon Reasoning Tasks",
Expand All @@ -37,32 +58,6 @@ export const dailyPapers: DailyPaper[] = [
why: "Discovers template collapse — a failure mode invisible to entropy in agentic RL — fixed with SNR-aware prompt filtering.",
whyZh: "发现模板坍塌这一被熵指标忽视的推理失效模式,提出信噪比滤波策略有效缓解。",
},
{
date: "2026-04-14",
title: "DFlash: Block Diffusion for Flash Speculative Decoding",
titleZh: "DFlash:面向快速推测解码的块扩散方法",
authors: "Jian Chen et al.",
arxivId: "2602.06036",
tags: ["Diffusion LM", "Efficient Inference"],
why: "Block diffusion drafter achieves 6x lossless speedup over base LLM — 2.5x faster than EAGLE-3 with no quality loss.",
whyZh: "用块扩散模型作为推测解码草稿器,实现6倍无损加速,比EAGLE-3快2.5倍。",
date: "2026-04-13",
date: "2026-04-12",
title: "SUPERNOVA: Eliciting General Reasoning in LLMs with Reinforcement Learning on Natural Instructions",
titleZh: "SUPERNOVA:基于自然指令强化学习提升LLM通用推理能力",
authors: "Ashima Suvarna et al.",
arxivId: "2604.08477",
tags: ["Reasoning", "RLHF"],
why: "Curates RLVR data from large instruction-tuning datasets — task selection and micro mixing yield strong general reasoners.",
whyZh: "从指令微调数据筛选RLVR样本,任务选择与微混合策略显著提升LLM通用推理能力。",
titleZh: "SUPERNOVA:利用自然指令强化学习激发LLM通用推理能力",
authors: "Ashima Suvarna et al.",
arxivId: "2604.08477",
tags: ["Reasoning", "RLHF"],
why: "Mines natural instruction datasets for verifiable rewards — extends RLVR beyond math/code to causal, temporal, and abductive reasoning without hand-crafted reward functions.",
whyZh: "从自然指令数据集中挖掘可验证奖励,将RLVR扩展至因果、时序和溯因推理,无需手工设计奖励函数。",
pick: true,
},
{
date: "2026-04-12",
title: "TriAttention: Efficient Long Reasoning with Trigonometric KV Compression",
Expand Down Expand Up @@ -93,8 +88,6 @@ export const dailyPapers: DailyPaper[] = [
tags: ["Diffusion LM", "Multimodal"],
why: "Direct AR-to-diffusion VLM conversion with KV-cache-compatible parallel decoding — matches AR quality across 11 multimodal benchmarks at lower inference cost.",
whyZh: "直接将自回归VLM转换为块扩散模型,支持KV缓存并行解码,在11项多模态基准上匹配AR质量。",
why: "Trigonometric KV compression exploits Q/K vector concentration — 2.5x throughput or 10.7x KV memory reduction on long reasoning.",
whyZh: "三角级数KV压缩利用Q/K向量集中性,长推理吞吐提升2.5倍或内存减少10.7倍。",
},
{
date: "2026-04-12",
Expand All @@ -105,14 +98,6 @@ export const dailyPapers: DailyPaper[] = [
tags: ["Reasoning", "Efficient Inference"],
why: "Evolutionary merging of reasoning and base models eliminates overthinking — cuts inference cost on easy problems without sacrificing accuracy.",
whyZh: "进化合并推理模型与基础模型,消除过度思考,简单问题推理开销显著降低。",
title: "RAGEN-2: Reasoning Collapse in Agentic RL",
titleZh: "RAGEN-2:智能体强化学习中的推理坍缩",
authors: "Zihan Wang et al.",
arxivId: "2604.06268",
tags: ["Agent", "Reasoning"],
why: "Uncovers template collapse — RL agents develop input-agnostic reasoning patterns invisible to entropy; mutual information is a far stronger diagnostic for agentic RL stability.",
whyZh: "揭示「模板坍缩」:RL智能体产生与输入无关的推理模式,熵无法检测,互信息是更可靠的推理质量诊断指标。",
pick: true,
},
{
date: "2026-04-12",
Expand Down Expand Up @@ -154,6 +139,8 @@ export const dailyPapers: DailyPaper[] = [
tags: ["Agent", "Reasoning", "Theory"],
why: "Information-theoretic proof via Data Processing Inequality: single-agent LLMs are more token-efficient on multi-hop reasoning — reported MAS gains trace to uncontrolled compute.",
whyZh: "通过数据处理不等式证明单智能体在多跳推理上信息效率更高,多智能体系统的性能优势源于未受控的计算量差异。",
},
{
date: "2026-04-11",
title: "SUPERNOVA: Eliciting General Reasoning in LLMs with Reinforcement Learning on Natural Instructions",
titleZh: "SUPERNOVA:用自然指令强化学习激发LLM通用推理能力",
Expand Down
2 changes: 1 addition & 1 deletion tsconfig.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"compilerOptions": {
"target": "es5",
"ignoreDeprecations": "6.0",
"ignoreDeprecations": "5.0",
"lib": ["dom", "dom.iterable", "esnext"],
"allowJs": true,
"skipLibCheck": true,
Expand Down