Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 31 additions & 45 deletions src/lib/daily.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,37 @@ export interface DailyPaper {
}

export const dailyPapers: DailyPaper[] = [
{
date: "2026-04-19",
title: "From P(y|x) to P(y): Investigating Reinforcement Learning in Pre-train Space",
titleZh: "从 P(y|x) 到 P(y):在预训练空间中探索强化学习",
authors: "Yuqiao Tan et al.",
arxivId: "2604.14142",
tags: ["Pre-training", "RLHF", "Reasoning"],
why: "PreRL applies RL on marginal P(y) in pre-train space; Dual Space RL combines NSR-PreRL with standard RLVR to expand reasoning horizons.",
whyZh: "在预训练空间对边缘分布P(y)施加RL,双空间RL结合负样本强化与标准RLVR,持续拓展模型推理边界。",
pick: true,
},
{
date: "2026-04-19",
title: "Parcae: Scaling Laws For Stable Looped Language Models",
titleZh: "Parcae:稳定循环语言模型的缩放定律",
authors: "Hayden Prairie et al.",
arxivId: "2604.12946",
tags: ["Pre-training", "Theory"],
why: "Stable looped architecture matches 87.5% quality of a 2x-larger Transformer — derives power-law FLOP scaling for parameter-efficient LMs.",
whyZh: "稳定循环架构以固定参数达到2倍大Transformer的87.5%质量,推导出参数高效语言模型的幂律FLOP缩放定律。",
},
{
date: "2026-04-19",
title: "A Mechanistic Analysis of Looped Reasoning Language Models",
titleZh: "循环推理语言模型的机制分析",
authors: "Hugh Blayney et al.",
arxivId: "2604.11791",
tags: ["Reasoning", "Theory"],
why: "Reveals looped LLM layers converge to distinct fixed points per cycle — attention heads stabilize, mechanistically explaining why looping boosts reasoning.",
whyZh: "揭示循环LLM各层收敛至不同不动点,注意力头行为逐轮稳定,从机制层面解释循环为何提升推理能力。",
},
{
date: "2026-04-14",
title: "SPPO: Sequence-Level PPO for Long-Horizon Reasoning Tasks",
Expand All @@ -37,32 +68,6 @@ export const dailyPapers: DailyPaper[] = [
why: "Discovers template collapse — a failure mode invisible to entropy in agentic RL — fixed with SNR-aware prompt filtering.",
whyZh: "发现模板坍塌这一被熵指标忽视的推理失效模式,提出信噪比滤波策略有效缓解。",
},
{
date: "2026-04-14",
title: "DFlash: Block Diffusion for Flash Speculative Decoding",
titleZh: "DFlash:面向快速推测解码的块扩散方法",
authors: "Jian Chen et al.",
arxivId: "2602.06036",
tags: ["Diffusion LM", "Efficient Inference"],
why: "Block diffusion drafter achieves 6x lossless speedup over base LLM — 2.5x faster than EAGLE-3 with no quality loss.",
whyZh: "用块扩散模型作为推测解码草稿器,实现6倍无损加速,比EAGLE-3快2.5倍。",
date: "2026-04-13",
date: "2026-04-12",
title: "SUPERNOVA: Eliciting General Reasoning in LLMs with Reinforcement Learning on Natural Instructions",
titleZh: "SUPERNOVA:基于自然指令强化学习提升LLM通用推理能力",
authors: "Ashima Suvarna et al.",
arxivId: "2604.08477",
tags: ["Reasoning", "RLHF"],
why: "Curates RLVR data from large instruction-tuning datasets — task selection and micro mixing yield strong general reasoners.",
whyZh: "从指令微调数据筛选RLVR样本,任务选择与微混合策略显著提升LLM通用推理能力。",
titleZh: "SUPERNOVA:利用自然指令强化学习激发LLM通用推理能力",
authors: "Ashima Suvarna et al.",
arxivId: "2604.08477",
tags: ["Reasoning", "RLHF"],
why: "Mines natural instruction datasets for verifiable rewards — extends RLVR beyond math/code to causal, temporal, and abductive reasoning without hand-crafted reward functions.",
whyZh: "从自然指令数据集中挖掘可验证奖励,将RLVR扩展至因果、时序和溯因推理,无需手工设计奖励函数。",
pick: true,
},
{
date: "2026-04-12",
title: "TriAttention: Efficient Long Reasoning with Trigonometric KV Compression",
Expand Down Expand Up @@ -93,8 +98,6 @@ export const dailyPapers: DailyPaper[] = [
tags: ["Diffusion LM", "Multimodal"],
why: "Direct AR-to-diffusion VLM conversion with KV-cache-compatible parallel decoding — matches AR quality across 11 multimodal benchmarks at lower inference cost.",
whyZh: "直接将自回归VLM转换为块扩散模型,支持KV缓存并行解码,在11项多模态基准上匹配AR质量。",
why: "Trigonometric KV compression exploits Q/K vector concentration — 2.5x throughput or 10.7x KV memory reduction on long reasoning.",
whyZh: "三角级数KV压缩利用Q/K向量集中性,长推理吞吐提升2.5倍或内存减少10.7倍。",
},
{
date: "2026-04-12",
Expand All @@ -105,14 +108,6 @@ export const dailyPapers: DailyPaper[] = [
tags: ["Reasoning", "Efficient Inference"],
why: "Evolutionary merging of reasoning and base models eliminates overthinking — cuts inference cost on easy problems without sacrificing accuracy.",
whyZh: "进化合并推理模型与基础模型,消除过度思考,简单问题推理开销显著降低。",
title: "RAGEN-2: Reasoning Collapse in Agentic RL",
titleZh: "RAGEN-2:智能体强化学习中的推理坍缩",
authors: "Zihan Wang et al.",
arxivId: "2604.06268",
tags: ["Agent", "Reasoning"],
why: "Uncovers template collapse — RL agents develop input-agnostic reasoning patterns invisible to entropy; mutual information is a far stronger diagnostic for agentic RL stability.",
whyZh: "揭示「模板坍缩」:RL智能体产生与输入无关的推理模式,熵无法检测,互信息是更可靠的推理质量诊断指标。",
pick: true,
},
{
date: "2026-04-12",
Expand Down Expand Up @@ -154,15 +149,6 @@ export const dailyPapers: DailyPaper[] = [
tags: ["Agent", "Reasoning", "Theory"],
why: "Information-theoretic proof via Data Processing Inequality: single-agent LLMs are more token-efficient on multi-hop reasoning — reported MAS gains trace to uncontrolled compute.",
whyZh: "通过数据处理不等式证明单智能体在多跳推理上信息效率更高,多智能体系统的性能优势源于未受控的计算量差异。",
date: "2026-04-11",
title: "SUPERNOVA: Eliciting General Reasoning in LLMs with Reinforcement Learning on Natural Instructions",
titleZh: "SUPERNOVA:用自然指令强化学习激发LLM通用推理能力",
authors: "Ashima Suvarna et al.",
arxivId: "2604.08477",
tags: ["Reasoning", "RLHF"],
why: "Data curation framework for RLVR on natural instructions — generalizes RL-driven reasoning beyond math/code to open-ended everyday tasks.",
whyZh: "基于自然指令的RLVR数据策划框架,将强化学习驱动的推理能力从数学/代码拓展至通用任务。",
pick: true,
},
{
date: "2026-04-11",
Expand Down
2 changes: 1 addition & 1 deletion tsconfig.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"compilerOptions": {
"target": "es5",
"ignoreDeprecations": "6.0",
"ignoreDeprecations": "5.0",
"lib": ["dom", "dom.iterable", "esnext"],
"allowJs": true,
"skipLibCheck": true,
Expand Down