yayajjiang · yayajjiang · Apr 22, 2026
diff --git a/src/lib/daily.ts b/src/lib/daily.ts
@@ -16,6 +16,37 @@ export interface DailyPaper {
 }
 
 export const dailyPapers: DailyPaper[] = [
+  {
+    date: "2026-04-22",
+    title: "KnowRL: Boosting LLM Reasoning via Reinforcement Learning with Minimal-Sufficient Knowledge Guidance",
+    titleZh: "KnowRL：最小充分知识引导的强化学习提升LLM推理",
+    authors: "et al.",
+    arxivId: "2604.12627",
+    tags: ["Reasoning", "RLHF"],
+    why: "Decomposes guidance into minimal-sufficient atomic knowledge points — slashes RLVR reward sparsity, +9.63 accuracy on 8 reasoning benchmarks at 1.5B scale.",
+    whyZh: "将提示分解为最小充分原子知识点，约束子集搜索缓解RLVR奖励稀疏，8项推理基准准确率平均提升9.63点。",
+    pick: true,
+  },
+  {
+    date: "2026-04-22",
+    title: "LLM Reasoning Is Latent, Not the Chain of Thought",
+    titleZh: "LLM推理隐于潜在状态，并非思维链表面",
+    authors: "Wenshuo Wang",
+    arxivId: "2604.15726",
+    tags: ["Reasoning", "Theory"],
+    why: "Formalizes three hypotheses for LLM reasoning — evidence most supports latent-state trajectories, not surface CoT, as the true reasoning object.",
+    whyZh: "形式化三种推理假设，现有证据最支持潜在状态轨迹为LLM推理真实载体，而非表面思维链，重塑基准设计方向。",
+  },
+  {
+    date: "2026-04-22",
+    title: "Demystifying the Unreasonable Effectiveness of Online Alignment Methods",
+    titleZh: "揭秘在线对齐方法的非凡有效性",
+    authors: "Enoch Hyunwook Kang",
+    arxivId: "2604.17207",
+    tags: ["Alignment", "Theory"],
+    why: "Proves online RLHF and DPO achieve better temperature-zero regret than KL-based bounds suggest — closes the theory-practice gap in LLM alignment.",
+    whyZh: "证明在线RLHF和DPO的零温度遗憾界优于KL分析所示，从理论上解释在线对齐方法的强实证表现。",
+  },
   {
     date: "2026-04-16",
     title: "Introspective Diffusion Language Models",
@@ -36,15 +67,6 @@ export const dailyPapers: DailyPaper[] = [
     tags: ["Diffusion LM", "Efficient Inference"],
     why: "DDTree builds a best-first draft tree from block diffusion per-position distributions — SOTA speculative decoding verified in one target model forward pass.",
     whyZh: "DDTree从块扩散逐位置分布构建最优优先草稿树，单次目标模型前向传播完成验证，达到推测解码SOTA。",
-    date: "2026-04-15",
-    title: "Introspective Diffusion Language Models",
-    titleZh: "内省扩散语言模型",
-    authors: "Yifan Yu et al.",
-    arxivId: "2604.11035",
-    tags: ["Diffusion LM", "Reasoning", "Efficient Inference"],
-    why: "Introspective strided decoding fixes diffusion LM's consistency gap — I-DLM-8B beats LLaDA-2.1-mini (16B) at 2.9–4.1x higher throughput.",
-    whyZh: "内省跨步解码修复扩散LM一致性缺陷，I-DLM-8B以2.9-4.1倍吞吐超越更大规模LLaDA。",
-    pick: true,
   },
   {
     date: "2026-04-15",
@@ -96,14 +118,6 @@ export const dailyPapers: DailyPaper[] = [
     tags: ["Diffusion LM", "Efficient Inference"],
     why: "Block diffusion drafter achieves 6x lossless speedup over base LLM — 2.5x faster than EAGLE-3 with no quality loss.",
     whyZh: "用块扩散模型作为推测解码草稿器，实现6倍无损加速，比EAGLE-3快2.5倍。",
-    date: "2026-04-12",
-    title: "SUPERNOVA: Eliciting General Reasoning in LLMs with Reinforcement Learning on Natural Instructions",
-    titleZh: "SUPERNOVA：利用自然指令强化学习激发LLM通用推理能力",
-    authors: "Ashima Suvarna et al.",
-    arxivId: "2604.08477",
-    tags: ["Reasoning", "RLHF"],
-    why: "Mines natural instruction datasets for verifiable rewards — extends RLVR beyond math/code to causal, temporal, and abductive reasoning without hand-crafted reward functions.",
-    whyZh: "从自然指令数据集中挖掘可验证奖励，将RLVR扩展至因果、时序和溯因推理，无需手工设计奖励函数。",
   },
   {
     date: "2026-04-12",