Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 34 additions & 27 deletions src/lib/daily.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,37 @@ export interface DailyPaper {
}

export const dailyPapers: DailyPaper[] = [
{
date: "2026-04-17",
title: "From P(y|x) to P(y): Investigating Reinforcement Learning in Pre-train Space",
titleZh: "从P(y|x)到P(y):在预训练空间探索强化学习",
authors: "Yuqiao Tan et al.",
arxivId: "2604.14142",
tags: ["Reasoning", "RLHF", "Pre-training"],
why: "DSRL: NSR in pre-train space expands reasoning horizon, then standard RL fine-tunes — outperforms all strong RLVR baselines.",
whyZh: "PreRL在预训练空间扩展推理边界,NSR快速剪枝错误路径,DSRL全面超越强基线。",
pick: true,
},
{
date: "2026-04-17",
title: "LongCoT: Benchmarking Long-Horizon Chain-of-Thought Reasoning",
titleZh: "LongCoT:长时域思维链推理基准评测",
authors: "Sumeet Ramesh Motwani et al.",
arxivId: "2604.14140",
tags: ["Reasoning", "Benchmark"],
why: "2,500 expert problems requiring up to 100K reasoning tokens — best frontier models score <10%, exposing a major long-horizon gap.",
whyZh: "2500道专家题需推理10万token,最优模型不足10%,长链推理缺口巨大。",
},
{
date: "2026-04-17",
title: "Calibration-Aware Policy Optimization for Reasoning LLMs",
titleZh: "面向推理LLM的校准感知策略优化",
authors: "Ziqi Wang et al.",
arxivId: "2604.12632",
tags: ["Reasoning", "RLHF"],
why: "CAPO fixes GRPO's overconfidence via logistic AUC surrogate loss — jointly optimizes calibration and accuracy. ACL 2026.",
whyZh: "CAPO通过AUC替代损失修正GRPO过度自信,同时优化准确率与不确定性校准,ACL 2026录用。",
},
{
date: "2026-04-14",
title: "SPPO: Sequence-Level PPO for Long-Horizon Reasoning Tasks",
Expand Down Expand Up @@ -46,15 +77,10 @@ export const dailyPapers: DailyPaper[] = [
tags: ["Diffusion LM", "Efficient Inference"],
why: "Block diffusion drafter achieves 6x lossless speedup over base LLM — 2.5x faster than EAGLE-3 with no quality loss.",
whyZh: "用块扩散模型作为推测解码草稿器,实现6倍无损加速,比EAGLE-3快2.5倍。",
date: "2026-04-13",
date: "2026-04-12",
},
{
date: "2026-04-11",
title: "SUPERNOVA: Eliciting General Reasoning in LLMs with Reinforcement Learning on Natural Instructions",
titleZh: "SUPERNOVA:基于自然指令强化学习提升LLM通用推理能力",
authors: "Ashima Suvarna et al.",
arxivId: "2604.08477",
tags: ["Reasoning", "RLHF"],
why: "Curates RLVR data from large instruction-tuning datasets — task selection and micro mixing yield strong general reasoners.",
whyZh: "从指令微调数据筛选RLVR样本,任务选择与微混合策略显著提升LLM通用推理能力。",
titleZh: "SUPERNOVA:利用自然指令强化学习激发LLM通用推理能力",
authors: "Ashima Suvarna et al.",
arxivId: "2604.08477",
Expand Down Expand Up @@ -93,8 +119,6 @@ export const dailyPapers: DailyPaper[] = [
tags: ["Diffusion LM", "Multimodal"],
why: "Direct AR-to-diffusion VLM conversion with KV-cache-compatible parallel decoding — matches AR quality across 11 multimodal benchmarks at lower inference cost.",
whyZh: "直接将自回归VLM转换为块扩散模型,支持KV缓存并行解码,在11项多模态基准上匹配AR质量。",
why: "Trigonometric KV compression exploits Q/K vector concentration — 2.5x throughput or 10.7x KV memory reduction on long reasoning.",
whyZh: "三角级数KV压缩利用Q/K向量集中性,长推理吞吐提升2.5倍或内存减少10.7倍。",
},
{
date: "2026-04-12",
Expand All @@ -105,14 +129,6 @@ export const dailyPapers: DailyPaper[] = [
tags: ["Reasoning", "Efficient Inference"],
why: "Evolutionary merging of reasoning and base models eliminates overthinking — cuts inference cost on easy problems without sacrificing accuracy.",
whyZh: "进化合并推理模型与基础模型,消除过度思考,简单问题推理开销显著降低。",
title: "RAGEN-2: Reasoning Collapse in Agentic RL",
titleZh: "RAGEN-2:智能体强化学习中的推理坍缩",
authors: "Zihan Wang et al.",
arxivId: "2604.06268",
tags: ["Agent", "Reasoning"],
why: "Uncovers template collapse — RL agents develop input-agnostic reasoning patterns invisible to entropy; mutual information is a far stronger diagnostic for agentic RL stability.",
whyZh: "揭示「模板坍缩」:RL智能体产生与输入无关的推理模式,熵无法检测,互信息是更可靠的推理质量诊断指标。",
pick: true,
},
{
date: "2026-04-12",
Expand Down Expand Up @@ -154,15 +170,6 @@ export const dailyPapers: DailyPaper[] = [
tags: ["Agent", "Reasoning", "Theory"],
why: "Information-theoretic proof via Data Processing Inequality: single-agent LLMs are more token-efficient on multi-hop reasoning — reported MAS gains trace to uncontrolled compute.",
whyZh: "通过数据处理不等式证明单智能体在多跳推理上信息效率更高,多智能体系统的性能优势源于未受控的计算量差异。",
date: "2026-04-11",
title: "SUPERNOVA: Eliciting General Reasoning in LLMs with Reinforcement Learning on Natural Instructions",
titleZh: "SUPERNOVA:用自然指令强化学习激发LLM通用推理能力",
authors: "Ashima Suvarna et al.",
arxivId: "2604.08477",
tags: ["Reasoning", "RLHF"],
why: "Data curation framework for RLVR on natural instructions — generalizes RL-driven reasoning beyond math/code to open-ended everyday tasks.",
whyZh: "基于自然指令的RLVR数据策划框架,将强化学习驱动的推理能力从数学/代码拓展至通用任务。",
pick: true,
},
{
date: "2026-04-11",
Expand Down
2 changes: 1 addition & 1 deletion tsconfig.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"compilerOptions": {
"target": "es5",
"ignoreDeprecations": "6.0",
"ignoreDeprecations": "5.0",
"lib": ["dom", "dom.iterable", "esnext"],
"allowJs": true,
"skipLibCheck": true,
Expand Down