hplush · ai · Jun 4, 2025 · Jun 3, 2025 · Jun 3, 2025 · Jun 3, 2025
diff --git a/core/loader/atom.ts b/core/loader/atom.ts
@@ -4,8 +4,9 @@ import { createPostsList } from '../posts-list.ts'
 import type { Loader } from './index.ts'
 import {
   findAnchorHrefs,
+  findDocumentLinks,
+  findHeaderLinks,
   findImageByAttr,
-  findLinksByType,
   isHTML,
   toTime
 } from './utils.ts'
@@ -37,9 +38,12 @@ function parsePosts(text: TextResponse): OriginPost[] {
 
 export const atom: Loader = {
   getMineLinksFromText(text) {
-    if (!isHTML(text)) return []
+    let type = 'application/atom+xml'
+    let headerLinks = findHeaderLinks(text, type)
+    if (!isHTML(text)) return headerLinks
     let links = [
-      ...findLinksByType(text, 'application/atom+xml'),
+      ...headerLinks,
+      ...findDocumentLinks(text, type),
       ...findAnchorHrefs(text, /feeds\.|feed\.|\.atom|\/atom/i, /feed|atom/i)
     ]
     if (links.length > 0) {

diff --git a/core/loader/json-feed.ts b/core/loader/json-feed.ts
@@ -5,7 +5,8 @@ import { createPostsList } from '../posts-list.ts'
 import type { Loader } from './index.ts'
 import {
   findAnchorHrefs,
-  findLinksByType,
+  findDocumentLinks,
+  findHeaderLinks,
   isHTML,
   toTime,
   unique
@@ -131,10 +132,12 @@ function parsePosts(text: TextResponse): OriginPost[] {
 
 export const jsonFeed: Loader = {
   getMineLinksFromText(text) {
-    if (!isHTML(text)) return []
-    let linksByType = findLinksByType(text, 'application/feed+json')
+    let type = 'application/feed+json'
+    let headerLinks = findHeaderLinks(text, type)
+    if (!isHTML(text)) return headerLinks
+    let linksByType = [...headerLinks, ...findDocumentLinks(text, type)]
     if (linksByType.length === 0) {
-      linksByType = findLinksByType(text, 'application/json')
+      linksByType = findDocumentLinks(text, 'application/json')
     }
     return [...linksByType, ...findAnchorHrefs(text, /feed\.json/i)]
   },

diff --git a/core/loader/rss.ts b/core/loader/rss.ts
@@ -4,8 +4,9 @@ import { createPostsList } from '../posts-list.ts'
 import type { Loader } from './index.ts'
 import {
   findAnchorHrefs,
+  findDocumentLinks,
+  findHeaderLinks,
   findImageByAttr,
-  findLinksByType,
   isHTML,
   toTime,
   unique
@@ -48,9 +49,12 @@ function parsePosts(text: TextResponse): OriginPost[] {
 
 export const rss: Loader = {
   getMineLinksFromText(text) {
-    if (!isHTML(text)) return []
+    let type = 'application/rss+xml'
+    let headerLinks = findHeaderLinks(text, type)
+    if (!isHTML(text)) return headerLinks
     return [
-      ...findLinksByType(text, 'application/rss+xml'),
+      ...headerLinks,
+      ...findDocumentLinks(text, type),
       ...findAnchorHrefs(text, /\.rss|\/rss/i, /rss/i)
     ]
   },

diff --git a/core/loader/utils.ts b/core/loader/utils.ts
@@ -30,7 +30,7 @@ function buildFullURL(
   )
 }
 
-export function findLinksByType(text: TextResponse, type: string): string[] {
+export function findDocumentLinks(text: TextResponse, type: string): string[] {
   let document = text.parseXml()
   if (!document) return []
   return [...document.querySelectorAll('link')]
@@ -61,6 +61,34 @@ export function findAnchorHrefs(
     .map(a => buildFullURL(a, text.url))
 }
 
+/**
+ * Returns an array of links found in the Link http header of a website,
+ * if the header is present.
+ * An example of a Link header with multiple urls:
+ * <http://blog.com/?feed=rss2>; rel="alternate"; type="application/rss+xml"
+ * Urls can also be multiple, comma-separated. And possibly relative.
+ */
+export function findHeaderLinks(
+  response: TextResponse,
+  type: string
+): string[] {
+  let linkHeader = response.headers.get('Link')
+  if (!linkHeader) {
+    return []
+  }
+  return linkHeader.split(/,\s?/).reduce<string[]>((urls, link) => {
+    let [, url] = link.match(/<(.*)>/) || []
+    let attributes = link.split(/;\s?/)
+    let matchesType = attributes.includes(`type="${type}"`)
+    let isAlternate = attributes.includes('rel="alternate"')
+    if (url && matchesType && isAlternate) {
+      let fullUrl = /^https?/.test(url) ? url : new URL(url, response.url).href
+      urls.push(fullUrl)
+    }
+    return urls
+  }, [])
+}
+
 export function toTime(date: null | string | undefined): number | undefined {
   if (!date) return undefined
   let time = new Date(date).getTime() / 1000

diff --git a/core/test/loader/atom.test.ts b/core/test/loader/atom.test.ts
@@ -134,11 +134,15 @@ test('finds atom links in <a> elements', () => {
           </body>
         </html>`,
         {
+          headers: new Headers({
+            Link: '</news/feed>; rel="alternate"; type="application/atom+xml"'
+          }),
           url: 'https://example.com/news'
         }
       )
     ),
     [
+      'https://example.com/news/feed',
       'https://example.com/news/atom',
       'https://example.com/blog/feed.xml',
       'https://example.com/something.atom',

diff --git a/core/test/loader/json-feed.test.ts b/core/test/loader/json-feed.test.ts
@@ -55,11 +55,15 @@ test('detects links', () => {
           </head>
         </html>`,
         {
+          headers: new Headers({
+            Link: '</news/json>; rel="alternate"; type="application/feed+json"'
+          }),
           url: 'https://example.com/news/'
         }
       )
     ),
     [
+      'https://example.com/news/json',
       'https://example.com/a',
       'https://example.com/news/b',
       'https://example.com/c',

diff --git a/core/test/loader/rss.test.ts b/core/test/loader/rss.test.ts
@@ -39,11 +39,15 @@ test('detects links', () => {
           </head>
         </html>`,
         {
+          headers: new Headers({
+            Link: '</news/rss>; rel="alternate"; type="application/rss+xml"'
+          }),
           url: 'https://example.com/news/'
         }
       )
     ),
     [
+      'https://example.com/news/rss',
       'https://example.com/a',
       'https://example.com/news/b',
       'https://example.com/c',

diff --git a/core/test/loader/utils.test.ts b/core/test/loader/utils.test.ts
@@ -0,0 +1,51 @@
+import { deepStrictEqual } from 'node:assert'
+import { test } from 'node:test'
+
+import { createTextResponse } from '../../download.js'
+import { findHeaderLinks } from '../../loader/utils.js'
+
+test('returns urls from link http header', () => {
+  deepStrictEqual(
+    findHeaderLinks(
+      createTextResponse(`<!DOCTYPE html><html><head></head></html>`, {
+        headers: new Headers({
+          Link: '<https://one.example.com>; rel="alternate"; type="application/rss+xml"'
+            + ', <https://two.example.com>; rel="alternate"; type="application/rss+xml"'
+        }),
+        url: 'https://example.com'
+      }),
+      'application/rss+xml'
+    ),
+    ['https://one.example.com', 'https://two.example.com']
+  )
+})
+
+test('handles root-relative urls in http header', () => {
+  deepStrictEqual(
+    findHeaderLinks(
+      createTextResponse(`<!DOCTYPE html><html><head></head></html>`, {
+        headers: new Headers({
+          Link: '</rss>; rel="alternate"; type="application/atom+xml"'
+        }),
+        url: 'https://example.com/blog'
+      }),
+      'application/atom+xml'
+    ),
+    ['https://example.com/rss']
+  )
+})
+
+test('handles relative urls in http header', () => {
+  deepStrictEqual(
+    findHeaderLinks(
+      createTextResponse(`<!DOCTYPE html><html><head></head></html>`, {
+        headers: new Headers({
+          Link: '<./rss>; rel="alternate"; type="application/atom+xml"'
+        }),
+        url: 'https://example.com/blog/'
+      }),
+      'application/atom+xml'
+    ),
+    ['https://example.com/blog/rss']
+  )
+})