From 7d472339f7dffae1c2f33c5bec6d7071eb519bae Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 19 Apr 2026 16:19:38 +0000 Subject: [PATCH] feat(daily): 2026-04-19 digest https://claude.ai/code/session_0158MM7Pqei5atabQHd7AELS --- src/lib/daily.ts | 76 ++++++++++++++++++++---------------------------- tsconfig.json | 2 +- 2 files changed, 32 insertions(+), 46 deletions(-) diff --git a/src/lib/daily.ts b/src/lib/daily.ts index e03462e..0371cee 100644 --- a/src/lib/daily.ts +++ b/src/lib/daily.ts @@ -16,6 +16,37 @@ export interface DailyPaper { } export const dailyPapers: DailyPaper[] = [ + { + date: "2026-04-19", + title: "From P(y|x) to P(y): Investigating Reinforcement Learning in Pre-train Space", + titleZh: "从 P(y|x) 到 P(y):在预训练空间中探索强化学习", + authors: "Yuqiao Tan et al.", + arxivId: "2604.14142", + tags: ["Pre-training", "RLHF", "Reasoning"], + why: "PreRL applies RL on marginal P(y) in pre-train space; Dual Space RL combines NSR-PreRL with standard RLVR to expand reasoning horizons.", + whyZh: "在预训练空间对边缘分布P(y)施加RL,双空间RL结合负样本强化与标准RLVR,持续拓展模型推理边界。", + pick: true, + }, + { + date: "2026-04-19", + title: "Parcae: Scaling Laws For Stable Looped Language Models", + titleZh: "Parcae:稳定循环语言模型的缩放定律", + authors: "Hayden Prairie et al.", + arxivId: "2604.12946", + tags: ["Pre-training", "Theory"], + why: "Stable looped architecture matches 87.5% quality of a 2x-larger Transformer — derives power-law FLOP scaling for parameter-efficient LMs.", + whyZh: "稳定循环架构以固定参数达到2倍大Transformer的87.5%质量,推导出参数高效语言模型的幂律FLOP缩放定律。", + }, + { + date: "2026-04-19", + title: "A Mechanistic Analysis of Looped Reasoning Language Models", + titleZh: "循环推理语言模型的机制分析", + authors: "Hugh Blayney et al.", + arxivId: "2604.11791", + tags: ["Reasoning", "Theory"], + why: "Reveals looped LLM layers converge to distinct fixed points per cycle — attention heads stabilize, mechanistically explaining why looping boosts reasoning.", + whyZh: "揭示循环LLM各层收敛至不同不动点,注意力头行为逐轮稳定,从机制层面解释循环为何提升推理能力。", + }, { date: "2026-04-14", title: "SPPO: Sequence-Level PPO for Long-Horizon Reasoning Tasks", @@ -37,32 +68,6 @@ export const dailyPapers: DailyPaper[] = [ why: "Discovers template collapse — a failure mode invisible to entropy in agentic RL — fixed with SNR-aware prompt filtering.", whyZh: "发现模板坍塌这一被熵指标忽视的推理失效模式,提出信噪比滤波策略有效缓解。", }, - { - date: "2026-04-14", - title: "DFlash: Block Diffusion for Flash Speculative Decoding", - titleZh: "DFlash:面向快速推测解码的块扩散方法", - authors: "Jian Chen et al.", - arxivId: "2602.06036", - tags: ["Diffusion LM", "Efficient Inference"], - why: "Block diffusion drafter achieves 6x lossless speedup over base LLM — 2.5x faster than EAGLE-3 with no quality loss.", - whyZh: "用块扩散模型作为推测解码草稿器,实现6倍无损加速,比EAGLE-3快2.5倍。", - date: "2026-04-13", - date: "2026-04-12", - title: "SUPERNOVA: Eliciting General Reasoning in LLMs with Reinforcement Learning on Natural Instructions", - titleZh: "SUPERNOVA:基于自然指令强化学习提升LLM通用推理能力", - authors: "Ashima Suvarna et al.", - arxivId: "2604.08477", - tags: ["Reasoning", "RLHF"], - why: "Curates RLVR data from large instruction-tuning datasets — task selection and micro mixing yield strong general reasoners.", - whyZh: "从指令微调数据筛选RLVR样本,任务选择与微混合策略显著提升LLM通用推理能力。", - titleZh: "SUPERNOVA:利用自然指令强化学习激发LLM通用推理能力", - authors: "Ashima Suvarna et al.", - arxivId: "2604.08477", - tags: ["Reasoning", "RLHF"], - why: "Mines natural instruction datasets for verifiable rewards — extends RLVR beyond math/code to causal, temporal, and abductive reasoning without hand-crafted reward functions.", - whyZh: "从自然指令数据集中挖掘可验证奖励,将RLVR扩展至因果、时序和溯因推理,无需手工设计奖励函数。", - pick: true, - }, { date: "2026-04-12", title: "TriAttention: Efficient Long Reasoning with Trigonometric KV Compression", @@ -93,8 +98,6 @@ export const dailyPapers: DailyPaper[] = [ tags: ["Diffusion LM", "Multimodal"], why: "Direct AR-to-diffusion VLM conversion with KV-cache-compatible parallel decoding — matches AR quality across 11 multimodal benchmarks at lower inference cost.", whyZh: "直接将自回归VLM转换为块扩散模型,支持KV缓存并行解码,在11项多模态基准上匹配AR质量。", - why: "Trigonometric KV compression exploits Q/K vector concentration — 2.5x throughput or 10.7x KV memory reduction on long reasoning.", - whyZh: "三角级数KV压缩利用Q/K向量集中性,长推理吞吐提升2.5倍或内存减少10.7倍。", }, { date: "2026-04-12", @@ -105,14 +108,6 @@ export const dailyPapers: DailyPaper[] = [ tags: ["Reasoning", "Efficient Inference"], why: "Evolutionary merging of reasoning and base models eliminates overthinking — cuts inference cost on easy problems without sacrificing accuracy.", whyZh: "进化合并推理模型与基础模型,消除过度思考,简单问题推理开销显著降低。", - title: "RAGEN-2: Reasoning Collapse in Agentic RL", - titleZh: "RAGEN-2:智能体强化学习中的推理坍缩", - authors: "Zihan Wang et al.", - arxivId: "2604.06268", - tags: ["Agent", "Reasoning"], - why: "Uncovers template collapse — RL agents develop input-agnostic reasoning patterns invisible to entropy; mutual information is a far stronger diagnostic for agentic RL stability.", - whyZh: "揭示「模板坍缩」:RL智能体产生与输入无关的推理模式,熵无法检测,互信息是更可靠的推理质量诊断指标。", - pick: true, }, { date: "2026-04-12", @@ -154,15 +149,6 @@ export const dailyPapers: DailyPaper[] = [ tags: ["Agent", "Reasoning", "Theory"], why: "Information-theoretic proof via Data Processing Inequality: single-agent LLMs are more token-efficient on multi-hop reasoning — reported MAS gains trace to uncontrolled compute.", whyZh: "通过数据处理不等式证明单智能体在多跳推理上信息效率更高,多智能体系统的性能优势源于未受控的计算量差异。", - date: "2026-04-11", - title: "SUPERNOVA: Eliciting General Reasoning in LLMs with Reinforcement Learning on Natural Instructions", - titleZh: "SUPERNOVA:用自然指令强化学习激发LLM通用推理能力", - authors: "Ashima Suvarna et al.", - arxivId: "2604.08477", - tags: ["Reasoning", "RLHF"], - why: "Data curation framework for RLVR on natural instructions — generalizes RL-driven reasoning beyond math/code to open-ended everyday tasks.", - whyZh: "基于自然指令的RLVR数据策划框架,将强化学习驱动的推理能力从数学/代码拓展至通用任务。", - pick: true, }, { date: "2026-04-11", diff --git a/tsconfig.json b/tsconfig.json index 2cdebf9..2511cdb 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -1,7 +1,7 @@ { "compilerOptions": { "target": "es5", - "ignoreDeprecations": "6.0", + "ignoreDeprecations": "5.0", "lib": ["dom", "dom.iterable", "esnext"], "allowJs": true, "skipLibCheck": true,