Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions core/loader/atom.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ import { createPostsList } from '../posts-list.ts'
import type { Loader } from './index.ts'
import {
findAnchorHrefs,
findDocumentLinks,
findHeaderLinks,
findImageByAttr,
findLinksByType,
isHTML,
toTime
} from './utils.ts'
Expand Down Expand Up @@ -37,9 +38,12 @@ function parsePosts(text: TextResponse): OriginPost[] {

export const atom: Loader = {
getMineLinksFromText(text) {
if (!isHTML(text)) return []
let type = 'application/atom+xml'
let headerLinks = findHeaderLinks(text, type)
if (!isHTML(text)) return headerLinks
let links = [
...findLinksByType(text, 'application/atom+xml'),
...headerLinks,
...findDocumentLinks(text, type),
...findAnchorHrefs(text, /feeds\.|feed\.|\.atom|\/atom/i, /feed|atom/i)
]
if (links.length > 0) {
Expand Down
11 changes: 7 additions & 4 deletions core/loader/json-feed.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ import { createPostsList } from '../posts-list.ts'
import type { Loader } from './index.ts'
import {
findAnchorHrefs,
findLinksByType,
findDocumentLinks,
findHeaderLinks,
isHTML,
toTime,
unique
Expand Down Expand Up @@ -131,10 +132,12 @@ function parsePosts(text: TextResponse): OriginPost[] {

export const jsonFeed: Loader = {
getMineLinksFromText(text) {
if (!isHTML(text)) return []
let linksByType = findLinksByType(text, 'application/feed+json')
let type = 'application/feed+json'
let headerLinks = findHeaderLinks(text, type)
if (!isHTML(text)) return headerLinks
let linksByType = [...headerLinks, ...findDocumentLinks(text, type)]
if (linksByType.length === 0) {
linksByType = findLinksByType(text, 'application/json')
linksByType = findDocumentLinks(text, 'application/json')
}
return [...linksByType, ...findAnchorHrefs(text, /feed\.json/i)]
},
Expand Down
10 changes: 7 additions & 3 deletions core/loader/rss.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ import { createPostsList } from '../posts-list.ts'
import type { Loader } from './index.ts'
import {
findAnchorHrefs,
findDocumentLinks,
findHeaderLinks,
findImageByAttr,
findLinksByType,
isHTML,
toTime,
unique
Expand Down Expand Up @@ -48,9 +49,12 @@ function parsePosts(text: TextResponse): OriginPost[] {

export const rss: Loader = {
getMineLinksFromText(text) {
if (!isHTML(text)) return []
let type = 'application/rss+xml'
let headerLinks = findHeaderLinks(text, type)
if (!isHTML(text)) return headerLinks
return [
...findLinksByType(text, 'application/rss+xml'),
...headerLinks,
...findDocumentLinks(text, type),
...findAnchorHrefs(text, /\.rss|\/rss/i, /rss/i)
]
},
Expand Down
30 changes: 29 additions & 1 deletion core/loader/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ function buildFullURL(
)
Comment thread
ashlkv marked this conversation as resolved.
}

export function findLinksByType(text: TextResponse, type: string): string[] {
export function findDocumentLinks(text: TextResponse, type: string): string[] {
let document = text.parseXml()
if (!document) return []
return [...document.querySelectorAll('link')]
Expand Down Expand Up @@ -61,6 +61,34 @@ export function findAnchorHrefs(
.map(a => buildFullURL(a, text.url))
}

/**
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very good comment. But we don’t use it in another functions.

Can we keep the old comment-less format in PR?

But I am open to discuss adding more comments (just prefer to have some policy).

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's consider adding more comments. It's hard to understand what functions do otherwise, even looking at tests. Especially for newcomers. And especially now without ui.

* Returns an array of links found in the Link http header of a website,
* if the header is present.
* An example of a Link header with multiple urls:
* <http://blog.com/?feed=rss2>; rel="alternate"; type="application/rss+xml"
* Urls can also be multiple, comma-separated. And possibly relative.
*/
export function findHeaderLinks(
response: TextResponse,
type: string
): string[] {
let linkHeader = response.headers.get('Link')
if (!linkHeader) {
return []
}
return linkHeader.split(/,\s?/).reduce<string[]>((urls, link) => {
let [, url] = link.match(/<(.*)>/) || []
let attributes = link.split(/;\s?/)
let matchesType = attributes.includes(`type="${type}"`)
let isAlternate = attributes.includes('rel="alternate"')
if (url && matchesType && isAlternate) {
let fullUrl = /^https?/.test(url) ? url : new URL(url, response.url).href
urls.push(fullUrl)
}
return urls
}, [])
}

export function toTime(date: null | string | undefined): number | undefined {
if (!date) return undefined
let time = new Date(date).getTime() / 1000
Expand Down
4 changes: 4 additions & 0 deletions core/test/loader/atom.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -134,11 +134,15 @@ test('finds atom links in <a> elements', () => {
</body>
</html>`,
{
headers: new Headers({
Link: '</news/feed>; rel="alternate"; type="application/atom+xml"'
}),
url: 'https://example.com/news'
}
)
),
[
'https://example.com/news/feed',
'https://example.com/news/atom',
'https://example.com/blog/feed.xml',
'https://example.com/something.atom',
Expand Down
4 changes: 4 additions & 0 deletions core/test/loader/json-feed.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,15 @@ test('detects links', () => {
</head>
</html>`,
{
headers: new Headers({
Link: '</news/json>; rel="alternate"; type="application/feed+json"'
}),
url: 'https://example.com/news/'
}
)
),
[
'https://example.com/news/json',
'https://example.com/a',
'https://example.com/news/b',
'https://example.com/c',
Expand Down
4 changes: 4 additions & 0 deletions core/test/loader/rss.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,15 @@ test('detects links', () => {
</head>
</html>`,
{
headers: new Headers({
Link: '</news/rss>; rel="alternate"; type="application/rss+xml"'
}),
url: 'https://example.com/news/'
}
)
),
[
'https://example.com/news/rss',
'https://example.com/a',
'https://example.com/news/b',
'https://example.com/c',
Expand Down
51 changes: 51 additions & 0 deletions core/test/loader/utils.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import { deepStrictEqual } from 'node:assert'
import { test } from 'node:test'

import { createTextResponse } from '../../download.js'
import { findHeaderLinks } from '../../loader/utils.js'

test('returns urls from link http header', () => {
deepStrictEqual(
findHeaderLinks(
createTextResponse(`<!DOCTYPE html><html><head></head></html>`, {
headers: new Headers({
Link: '<https://one.example.com>; rel="alternate"; type="application/rss+xml"'
+ ', <https://two.example.com>; rel="alternate"; type="application/rss+xml"'
}),
url: 'https://example.com'
}),
'application/rss+xml'
),
['https://one.example.com', 'https://two.example.com']
)
})

test('handles root-relative urls in http header', () => {
deepStrictEqual(
findHeaderLinks(
createTextResponse(`<!DOCTYPE html><html><head></head></html>`, {
headers: new Headers({
Link: '</rss>; rel="alternate"; type="application/atom+xml"'
}),
url: 'https://example.com/blog'
}),
'application/atom+xml'
),
['https://example.com/rss']
)
})

test('handles relative urls in http header', () => {
deepStrictEqual(
findHeaderLinks(
createTextResponse(`<!DOCTYPE html><html><head></head></html>`, {
headers: new Headers({
Link: '<./rss>; rel="alternate"; type="application/atom+xml"'
}),
url: 'https://example.com/blog/'
}),
'application/atom+xml'
),
['https://example.com/blog/rss']
)
})
Loading