From 597e068031a5313af0fb4133fa45de266dff3fda Mon Sep 17 00:00:00 2001 From: joyehuang Date: Wed, 11 Feb 2026 18:33:01 +0800 Subject: [PATCH] feat: add papers reading module --- src/components/papers/PaperHero.astro | 121 ++++++++++++++++++ src/components/papers/PaperMeta.astro | 66 ++++++++++ src/components/papers/PaperPreview.astro | 108 ++++++++++++++++ src/content.config.ts | 38 +++++- .../post.mdx | 70 ++++++++++ .../post.mdx | 57 +++++++++ src/layouts/PaperPost.astro | 63 +++++++++ src/pages/index.astro | 18 ++- src/pages/papers/[...id].astro | 29 +++++ src/pages/papers/[...page].astro | 105 +++++++++++++++ src/pages/papers/tags/[tag]/[...page].astro | 72 +++++++++++ src/pages/papers/tags/index.astro | 51 ++++++++ src/site.config.ts | 3 + 13 files changed, 799 insertions(+), 2 deletions(-) create mode 100644 src/components/papers/PaperHero.astro create mode 100644 src/components/papers/PaperMeta.astro create mode 100644 src/components/papers/PaperPreview.astro create mode 100644 src/content/papers/20260211 - attention-is-all-you-need/post.mdx create mode 100644 src/content/papers/20260211 - back-to-basics-let-denoising-models-denoise/post.mdx create mode 100644 src/layouts/PaperPost.astro create mode 100644 src/pages/papers/[...id].astro create mode 100644 src/pages/papers/[...page].astro create mode 100644 src/pages/papers/tags/[tag]/[...page].astro create mode 100644 src/pages/papers/tags/index.astro diff --git a/src/components/papers/PaperHero.astro b/src/components/papers/PaperHero.astro new file mode 100644 index 0000000..79bd264 --- /dev/null +++ b/src/components/papers/PaperHero.astro @@ -0,0 +1,121 @@ +--- +import { Image } from 'astro:assets' +import type { InferEntrySchema } from 'astro:content' + +import { FormattedDate } from 'astro-pure/user' +import { cn } from 'astro-pure/utils' + +interface Props { + data: InferEntrySchema<'papers'> + remarkPluginFrontmatter: Record +} + +const { + data: { + title, + description, + draft, + heroImage, + publishDate, + updatedDate, + status, + tags, + language, + comment: enableComment + }, + remarkPluginFrontmatter +} = Astro.props + +const dateTimeOptions: Intl.DateTimeFormatOptions = { + month: 'short' +} + +const statusText = + status === 'reading' ? 'Reading' : status === 'revisit' ? 'Need Revisit' : 'Completed' + +const statusClass = + status === 'reading' + ? 'border-amber-300/70 bg-amber-100/60 text-amber-700 dark:border-amber-700/60 dark:bg-amber-900/30 dark:text-amber-300' + : status === 'revisit' + ? 'border-sky-300/70 bg-sky-100/60 text-sky-700 dark:border-sky-700/60 dark:bg-sky-900/30 dark:text-sky-300' + : 'border-emerald-300/70 bg-emerald-100/60 text-emerald-700 dark:border-emerald-700/60 dark:bg-emerald-900/30 dark:text-emerald-300' +--- + +{ + heroImage && ( +
+ {heroImage.alt +
+ ) +} + +{draft && (Draft)} + +
+
+ + + { + updatedDate && ( + <> + / + + Update + + + + ) + } + + + {remarkPluginFrontmatter.minutesRead} + { + language && ( + <> + + {language} + + ) + } + + {statusText} +
+ +

{title}

+ +
+ {description} +
+ + {!!tags.length && ( +
+ {tags.map((tag) => ( + + #{tag} + + ))} +
+ )} + + {!draft && enableComment && ( +

+ 阅读后欢迎在评论区讨论你对这篇论文的理解和分歧。 +

+ )} +
+ +
diff --git a/src/components/papers/PaperMeta.astro b/src/components/papers/PaperMeta.astro new file mode 100644 index 0000000..bf54916 --- /dev/null +++ b/src/components/papers/PaperMeta.astro @@ -0,0 +1,66 @@ +--- +import type { InferEntrySchema } from 'astro:content' +import { cn } from 'astro-pure/utils' + +interface Props { + data: InferEntrySchema<'papers'> + class?: string +} + +const { + data: { paperLink, pdfLink, codeLink, authors, venue, year }, + class: className +} = Astro.props +--- + +
+

Paper Reference

+ +
+ + Original Link ↗ + + { + pdfLink && ( + + PDF ↗ + + ) + } + { + codeLink && ( + + Code ↗ + + ) + } +
+ +
+

+ Authors: + {authors.join(', ')} +

+

+ Venue: + {venue || 'N/A'} + {' · '} + {year} +

+
+
diff --git a/src/components/papers/PaperPreview.astro b/src/components/papers/PaperPreview.astro new file mode 100644 index 0000000..fae648b --- /dev/null +++ b/src/components/papers/PaperPreview.astro @@ -0,0 +1,108 @@ +--- +import type { CollectionEntry } from 'astro:content' +import { FormattedDate } from 'astro-pure/user' +import { cn } from 'astro-pure/utils' + +interface Props { + paper: CollectionEntry<'papers'> + detailed?: boolean + class?: string +} + +const { paper, detailed = false, class: className } = Astro.props + +const { + id, + data: { title, description, publishDate, updatedDate, status, tags, venue, year, paperLink, draft } +} = paper + +const postDate = updatedDate ?? publishDate + +const statusText = + status === 'reading' ? 'Reading' : status === 'revisit' ? 'Need Revisit' : 'Completed' + +const statusClass = + status === 'reading' + ? 'border-amber-300/70 bg-amber-100/60 text-amber-700 dark:border-amber-700/60 dark:bg-amber-900/30 dark:text-amber-300' + : status === 'revisit' + ? 'border-sky-300/70 bg-sky-100/60 text-sky-700 dark:border-sky-700/60 dark:bg-sky-900/30 dark:text-sky-300' + : 'border-emerald-300/70 bg-emerald-100/60 text-emerald-700 dark:border-emerald-700/60 dark:bg-emerald-900/30 dark:text-emerald-300' +--- + +
  • + +
    + + + {year} + {venue && ( + <> + + {venue} + + )} + + + {statusText} + +
    + +
    +

    + {draft && (Draft) } + {title} +

    + + + + +
    + +

    + {description} +

    +
    + +
    + + Original Paper ↗ + + { + tags.map((tag) => ( + + #{tag} + + )) + } +
    +
  • diff --git a/src/content.config.ts b/src/content.config.ts index 8237059..17cf58f 100644 --- a/src/content.config.ts +++ b/src/content.config.ts @@ -39,4 +39,40 @@ const blog = defineCollection({ }) }) -export const collections = { blog } +const papers = defineCollection({ + // Load Markdown and MDX files in the `src/content/papers/` directory. + loader: glob({ base: './src/content/papers', pattern: '**/*.{md,mdx}' }), + schema: ({ image }) => + z.object({ + // Required + title: z.string().max(120), + description: z.string().max(280), + publishDate: z.coerce.date(), + paperLink: z.string().url(), + authors: z.array(z.string()).min(1), + year: z.number().int(), + // Optional + updatedDate: z.coerce.date().optional(), + venue: z.string().optional(), + pdfLink: z.string().url().optional(), + codeLink: z.string().url().optional(), + heroImage: z + .object({ + src: image(), + alt: z.string().optional(), + inferSize: z.boolean().optional(), + width: z.number().optional(), + height: z.number().optional(), + color: z.string().optional() + }) + .optional(), + tags: z.array(z.string()).default([]).transform(removeDupsAndLowerCase), + status: z.enum(['reading', 'completed', 'revisit']).default('completed'), + language: z.string().optional(), + draft: z.boolean().default(false), + comment: z.boolean().default(true), + featured: z.boolean().default(false) + }) +}) + +export const collections = { blog, papers } diff --git a/src/content/papers/20260211 - attention-is-all-you-need/post.mdx b/src/content/papers/20260211 - attention-is-all-you-need/post.mdx new file mode 100644 index 0000000..004e42e --- /dev/null +++ b/src/content/papers/20260211 - attention-is-all-you-need/post.mdx @@ -0,0 +1,70 @@ +--- +title: Attention Is All You Need 精读 +description: Transformer 论文精读,聚焦 self-attention 设计动机、模型结构、训练细节与影响,并补充我在实现和阅读中的关键笔记。 +publishDate: 2026-02-11 00:00:00 +paperLink: https://arxiv.org/abs/1706.03762 +pdfLink: https://arxiv.org/pdf/1706.03762 +authors: + - Ashish Vaswani + - Noam Shazeer + - Niki Parmar + - Jakob Uszkoreit + - Llion Jones + - Aidan N. Gomez + - Lukasz Kaiser + - Illia Polosukhin +venue: NeurIPS +year: 2017 +tags: ['Transformer', 'Attention', 'NLP', 'Foundation'] +status: completed +language: en +featured: true +comment: true +--- + +## Paper Info + +- Original link: [Attention Is All You Need](https://arxiv.org/abs/1706.03762) +- Main topic: sequence modeling without recurrence/convolution +- One-line takeaway: 用全局自注意力替代 RNN/CNN,显著提升并行效率和长程依赖建模能力。 + +## Core Contributions + +1. 提出 `Encoder-Decoder + Multi-Head Self-Attention` 的完整架构。 +2. 用 `Scaled Dot-Product Attention` 稳定训练(除以 `sqrt(d_k)`)。 +3. 通过 `Positional Encoding` 注入位置信息,而非依赖循环结构。 +4. 在机器翻译任务上实现更优质量和更低训练成本。 + +## Method Breakdown + +- Attention 基本形式: + +```text +Attention(Q, K, V) = softmax(QK^T / sqrt(d_k)) V +``` + +- 关键模块组合: + - Multi-Head Attention + - Position-wise FeedForward + - Residual + LayerNorm +- 在实现层面,我最关注两点: + - head 维度切分和拼接是否保持 shape 一致 + - mask 应用顺序是否正确(softmax 前) + +## Experiment Notes + +- 论文强调训练效率:在 GPU 上并行度明显高于 RNN。 +- BLEU 指标提升说明该架构不仅更快,也更有效。 +- Ablation 里 multi-head 的收益很稳定,说明“多视角对齐”确实有效。 + +## My Notes + +- 这篇论文最“工程可迁移”的点:结构简单、扩展性高。 +- 从今天看,它也是后续 GPT/BERT 系列架构的关键起点。 +- 真正读懂它后,再看 modern LLM block 的变化(RoPE/RMSNorm/SwiGLU)会更清晰。 + +## Open Questions & Ideas + +1. 在更长上下文下,注意力复杂度如何进一步优化? +2. 不同位置编码(绝对/相对/旋转)和原版 sinusoidal 的取舍边界是什么? +3. 多头数量增大何时开始出现收益递减? diff --git a/src/content/papers/20260211 - back-to-basics-let-denoising-models-denoise/post.mdx b/src/content/papers/20260211 - back-to-basics-let-denoising-models-denoise/post.mdx new file mode 100644 index 0000000..e5d2e70 --- /dev/null +++ b/src/content/papers/20260211 - back-to-basics-let-denoising-models-denoise/post.mdx @@ -0,0 +1,57 @@ +--- +title: 'Back to Basics: Let Denoising Generative Models Denoise 精读' +description: 围绕 Kaiming He 团队这篇 denoising 生成模型论文的结构化阅读笔记,记录核心动机、方法、实验观察和后续待验证的问题。 +publishDate: 2026-02-11 00:00:00 +paperLink: https://arxiv.org/abs/2511.13720 +pdfLink: https://arxiv.org/pdf/2511.13720 +authors: + - Kaiming He + - et al. +venue: arXiv +year: 2025 +tags: ['Generative Model', 'Diffusion', 'Denoising'] +status: reading +language: en +featured: true +comment: true +--- + +## Paper Info + +- Original link: [Back to Basics: Let Denoising Generative Models Denoise](https://arxiv.org/abs/2511.13720) +- Main topic: denoising generative models 的训练与采样机制重审 +- One-line takeaway: 回到“denoise 本身”这一目标,重新定义模型该优化什么、如何优化。 + +## Core Contributions + +1. 重新审视 denoising 生成模型中的训练目标与噪声建模假设。 +2. 给出更直接贴合 denoise 任务本质的建模方式。 +3. 在实验中验证“回归本质目标”带来的质量和稳定性收益。 + +## Method Breakdown + +- 阅读阶段我重点追踪三件事: + - 目标函数相对传统 diffusion objective 的变化 + - 不同 noise level 下模型行为差异 + - 训练/推理阶段是否保持一致的 denoise 语义 +- 当前结论:该工作试图削减任务定义与优化目标之间的偏差。 + +## Experiment Notes + +- 我会重点复核以下指标: + - 生成质量指标(FID/IS 或任务对应指标) + - 采样步数与质量/速度折中 + - 稳定性与复现敏感性 +- 初步印象:论文强调“简单有效”的路径,而非堆叠复杂技巧。 + +## My Notes + +- 这篇论文对我的启发:很多时候性能瓶颈不是网络结构,而是 objective 对任务本身的错配。 +- 读这类论文时,先盯“优化目标”比先盯“模块创新”更容易抓住本质。 +- 后续我计划做一个小规模复现实验,对比 baseline 与该方法在同数据集上的差异。 + +## Open Questions & Ideas + +1. 该方法在不同数据分布(图像分辨率、类别复杂度)下是否稳健? +2. 当模型规模增大时,收益是放大还是被稀释? +3. 是否能与 consistency / flow matching 的训练范式结合? diff --git a/src/layouts/PaperPost.astro b/src/layouts/PaperPost.astro new file mode 100644 index 0000000..5276082 --- /dev/null +++ b/src/layouts/PaperPost.astro @@ -0,0 +1,63 @@ +--- +import type { MarkdownHeading } from 'astro' +import type { CollectionEntry } from 'astro:content' + +import 'katex/dist/katex.min.css' + +import { Comment, MediumZoom } from 'astro-pure/advanced' +import { ArticleBottom, Copyright, TOC } from 'astro-pure/components/pages' +import PaperHero from '@/components/papers/PaperHero.astro' +import PaperMeta from '@/components/papers/PaperMeta.astro' +import PageLayout from '@/layouts/ContentLayout.astro' +import { integ } from '@/site-config' + +interface Props { + paper: CollectionEntry<'papers'> + papers: CollectionEntry<'papers'>[] + headings: MarkdownHeading[] + remarkPluginFrontmatter: Record +} + +const { + paper: { id, data }, + papers, + headings, + remarkPluginFrontmatter +} = Astro.props + +const { description, heroImage, publishDate, title, updatedDate, draft: isDraft, comment } = data + +const socialImage = heroImage + ? typeof heroImage.src === 'string' + ? heroImage.src + : heroImage.src.src + : '/images/social-card.png' + +const articleDate = updatedDate?.toISOString() ?? publishDate.toISOString() +const primaryColor = data.heroImage?.color ?? 'hsl(var(--primary) / var(--un-text-opacity))' +--- + + + {!!headings.length && } + + + + + + + + + + + + {!isDraft && comment && } + + + + + +{integ.mediumZoom.enable && } diff --git a/src/pages/index.astro b/src/pages/index.astro index b9a4136..f6d0a18 100644 --- a/src/pages/index.astro +++ b/src/pages/index.astro @@ -1,13 +1,14 @@ --- import { Image } from 'astro:assets' +import type { CollectionEntry } from 'astro:content' import avatar from 'src/assets/avatar.png' import { Quote } from 'astro-pure/advanced' import { PostPreview } from 'astro-pure/components/pages' import { getBlogCollection, sortMDByDate } from 'astro-pure/server' import { Button, Card, Icon, Label } from 'astro-pure/user' +import PaperPreview from '@/components/papers/PaperPreview.astro' import PageLayout from '@/layouts/BaseLayout.astro' -import ProjectCard from '@/components/home/ProjectCard.astro' import Section from '@/components/home/Section.astro' import SkillLayout from '@/components/home/SkillLayout.astro' import config from '@/site-config' @@ -20,6 +21,9 @@ const tools = ['Curor', 'Git', 'Docker', 'Postman', 'ESlint/Prettier', 'Jest'] const MAX_POSTS = 10 const allPosts = await getBlogCollection() const allPostsByDate = sortMDByDate(allPosts).slice(0, MAX_POSTS) +const MAX_PAPERS = 5 +const allPapers = (await getBlogCollection('papers')) as CollectionEntry<'papers'>[] +const allPapersByDate = sortMDByDate(allPapers).slice(0, MAX_PAPERS) as CollectionEntry<'papers'>[] --- @@ -106,6 +110,18 @@ const allPostsByDate = sortMDByDate(allPosts).slice(0, MAX_POSTS) ) } + { + allPapersByDate.length > 0 && ( +
    +
      + {allPapersByDate.map((paper) => ( + + ))} +
    +
    + ) + }
    []) + + return papers.map((paper) => ({ + params: { id: paper.id }, + props: { paper, papers } + })) +} + +type Props = { + paper: CollectionEntry<'papers'> + papers: CollectionEntry<'papers'>[] +} + +const { paper, papers } = Astro.props +const { Content, headings, remarkPluginFrontmatter } = await render(paper) +--- + + + + diff --git a/src/pages/papers/[...page].astro b/src/pages/papers/[...page].astro new file mode 100644 index 0000000..a5c46c0 --- /dev/null +++ b/src/pages/papers/[...page].astro @@ -0,0 +1,105 @@ +--- +import type { GetStaticPaths, Page } from 'astro' +import type { CollectionEntry } from 'astro:content' + +import { Paginator } from 'astro-pure/components/pages' +import { getBlogCollection, getUniqueTags, sortMDByDate } from 'astro-pure/server' +import { Button, Icon } from 'astro-pure/user' +import PaperPreview from '@/components/papers/PaperPreview.astro' +import PageLayout from '@/layouts/BaseLayout.astro' +import { paperPageSize } from '@/site-config' + +export const prerender = true + +export const getStaticPaths = (async ({ paginate }) => { + const papers = sortMDByDate((await getBlogCollection('papers')) as CollectionEntry<'papers'>[]) + const uniqueTags = getUniqueTags(papers) + const collectionsCount = papers.length + + return paginate(papers, { + pageSize: paperPageSize, + props: { uniqueTags, collectionsCount } + }) +}) satisfies GetStaticPaths + +interface Props { + page: Page> + uniqueTags: string[] + collectionsCount: number +} + +const { page, uniqueTags, collectionsCount } = Astro.props + +const meta = { + description: 'Paper reading notes, deep dives, and annotations', + title: 'Papers' +} + +const paginationProps = { + ...(page.url.prev && { + prevUrl: { + text: '← Previous Papers', + url: page.url.prev + } + }), + ...(page.url.next && { + nextUrl: { + text: 'Next Papers →', + url: page.url.next + } + }) +} +--- + + + + + ))} + + ) : ( +

    Any tag yet.

    + ) + } + + +
    diff --git a/src/site.config.ts b/src/site.config.ts index 0a54cc4..8837445 100644 --- a/src/site.config.ts +++ b/src/site.config.ts @@ -48,6 +48,7 @@ export const theme: ThemeUserConfig = { header: { menu: [ { title: 'Blog', link: '/blog' }, + { title: 'Papers', link: '/papers' }, { title: 'Projects', link: '/projects' }, { title: 'Links', link: '/links' }, { title: 'About', link: '/about' } @@ -83,6 +84,8 @@ export const theme: ThemeUserConfig = { } } +export const paperPageSize = 8 + export const integ: IntegrationUserConfig = { // Links management // See: https://astro-pure.js.org/docs/integrations/links