Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions core/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ _See the [full architecture guide](../README.md) first._
- [Client Environments](#client-environments)
- [URL Routing](#url-routing)
- [Test Strategy](#test-strategy)
- [Mocking Requests](#mocking-requests)

## Project Structure

Expand Down Expand Up @@ -86,3 +87,22 @@ n bnt core/test/html.test.ts -t 'sanitizes HTML'
In VS Code you can use [extension](https://marketplace.visualstudio.com/items?itemName=connor4312.nodejs-testing) to run specific test from UI.

Open `core/coverage/lcov-report/index.html` to see coverage issues.

## Mocking Requests

To enable network request mocking in tests, you have to set up and tear down request mock before and after each test:

```typescript
beforeEach(() => {
mockRequest()
})

afterEach(() => {
checkAndRemoveRequestMock()
})
```

In the test itself, before making or triggering the request itself, use either:

- `expectRequest(url).andRespond(...)` for simple mocking where the response is known upfront.
- or `expectRequest(url).andWait(...)` for complex scenarios where you need to control test loading states or simulate network delays.
74 changes: 67 additions & 7 deletions core/loader/atom.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import type { TextResponse } from '../download.ts'
import type { DownloadTask, TextResponse } from '../download.ts'
import type { OriginPost } from '../post.ts'
import { createPostsList } from '../posts-list.ts'
import { createPostsList, type PostsListLoader } from '../posts-list.ts'
import type { Loader } from './index.ts'
import {
buildFullURL,
findAnchorHrefs,
findDocumentLinks,
findHeaderLinks,
Expand Down Expand Up @@ -36,6 +37,66 @@ function parsePosts(text: TextResponse): OriginPost[] {
})
}

/**
* Returns next or previous pagination url from feed xml, if present.
* See "paged feeds" https://www.rfc-editor.org/rfc/rfc5005#section-3
*/
function getPaginationUrl(
xmlResponse: TextResponse,
rel: 'first' | 'last' | 'next' | 'previous'
): string | undefined {
let document = xmlResponse.parseXml()
if (!document) return undefined
let nextPageLink = [...document.querySelectorAll('link')].find(
link => link.getAttribute('rel') === rel
)
return nextPageLink ? buildFullURL(nextPageLink, xmlResponse.url) : undefined
}

type PostsCursor =
| [OriginPost[], PostsListLoader | undefined]
| [undefined, PostsListLoader]

/**
* If xml response is ready, returns a tuple of posts and possibly
* the loader of the next portion of posts, if xml contains a link to them.
* If xml response is not yet ready, returns the recursive loader of posts.
*/
function getPostsCursor(
task: DownloadTask,
feedUrl: string,
feedResponse: TextResponse | undefined
): PostsCursor {
if (!feedResponse) {
return [
undefined,
async () => {
let response = await task.text(feedUrl)
let [posts, loader] = getPostsCursor(task, feedUrl, response)
return [posts || [], loader]
}
]
}
let nextPageUrl = getPaginationUrl(feedResponse, 'next')
let posts = parsePosts(feedResponse)
if (nextPageUrl) {
return [
posts,
async () => {
let nextPageResponse = await task.text(nextPageUrl)
let [nextPosts, loader] = getPostsCursor(
task,
nextPageUrl,
nextPageResponse
)
return [nextPosts || [], loader]
}
]
} else {
return [posts, undefined]
}
}

export const atom: Loader = {
getMineLinksFromText(text) {
let type = 'application/atom+xml'
Expand All @@ -54,12 +115,11 @@ export const atom: Loader = {
},

getPosts(task, url, text) {
if (text) {
return createPostsList(parsePosts(text), undefined)
let [posts, nextLoader] = getPostsCursor(task, url, text)
if (!posts && nextLoader) {
return createPostsList(undefined, nextLoader)
} else {
return createPostsList(undefined, async () => {
return [parsePosts(await task.text(url)), undefined]
})
return createPostsList(posts || [], nextLoader)
}
},

Expand Down
2 changes: 1 addition & 1 deletion core/loader/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ export function isHTML(text: TextResponse): boolean {
* the explicitly provided base URL, but also the base URL specified
* in the document.
*/
function buildFullURL(
export function buildFullURL(
link: HTMLAnchorElement | HTMLLinkElement,
baseUrl: string
): string {
Expand Down
2 changes: 2 additions & 0 deletions core/request.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,13 @@ export function expectRequest(url: string): RequestMock {
}
requestExpects.push(expect)
return {
/** Immediately sets up a mock response that will be returned synchronously */
andRespond(status, body = '', contentType = 'text/html') {
expect.contentType = contentType
expect.status = status
expect.response = body
},
/** Returns a function that allows more control over the response */
andWait() {
let { promise, resolve } = Promise.withResolvers<void>()
expect.wait = promise
Expand Down
103 changes: 102 additions & 1 deletion core/test/loader/atom.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@ import '../dom-parser.ts'

import { spyOn } from 'nanospy'
import { deepStrictEqual, equal } from 'node:assert'
import { test } from 'node:test'
import { afterEach, beforeEach, test } from 'node:test'
import { setTimeout } from 'node:timers/promises'

import {
checkAndRemoveRequestMock,
createDownloadTask,
createTextResponse,
expectRequest,
loaders,
mockRequest,
type TextResponse
} from '../../index.ts'

Expand All @@ -20,6 +23,14 @@ function exampleAtom(responseBody: string): TextResponse {
})
}

beforeEach(() => {
mockRequest()
})

afterEach(() => {
checkAndRemoveRequestMock()
})

test('detects xml:base attribute', () => {
deepStrictEqual(
loaders.atom.getMineLinksFromText(
Expand Down Expand Up @@ -457,3 +468,93 @@ test('parses media', () => {
}
)
})

test('detects pagination with rel="next" link', () => {
let $store = loaders.atom.getPosts(
createDownloadTask(),
'https://example.com/feed/',
exampleAtom(
`<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<link rel="self" href="https://example.com/feeds/posts.atom"/>
<link rel="next" href="https://example.com/feeds/posts-2024.atom"/>
</feed>`
)
)
equal($store.get().hasNext, true)
})

test('detects when there is no pagination', () => {
let $store = loaders.atom.getPosts(
createDownloadTask(),
'https://example.com/feed/',
exampleAtom(
`<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom">
</feed>`
)
)
equal($store.get().hasNext, false)
})

test('loads first then second page', async () => {
let task = createDownloadTask()

expectRequest('https://example.com/feed').andRespond(
200,
`<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<link rel="next" href="https://example.com/feed?page=2" />
</feed>`,
'application/atom+xml'
)
let posts = loaders.atom.getPosts(task, 'https://example.com/feed')
await posts.loading

expectRequest('https://example.com/feed?page=2').andRespond(
200,
`<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom">
</feed>`,
'application/atom+xml'
)
await posts.next()
})

test('has posts from both pages', async () => {
let task = createDownloadTask()

expectRequest('https://example.com/feed').andRespond(
200,
`<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<link rel="next" href="https://example.com/feed?page=2" />
<entry>
<title>Post on page 1</title>
<id>1</id>
<published>2023-01-01T00:00:00Z</published>
</entry>
</feed>`,
'application/atom+xml'
)
let posts = loaders.atom.getPosts(task, 'https://example.com/feed')
await posts.loading

expectRequest('https://example.com/feed?page=2').andRespond(
200,
`<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<entry>
<title>Post on page 2</title>
<id>2</id>
<published>2023-01-02T00:00:00Z</published>
</entry>
</feed>`,
'application/atom+xml'
)
await posts.next()

equal(posts.get().list.length, 2)
equal(posts.get().list[0]?.title, 'Post on page 1')
equal(posts.get().list[1]?.title, 'Post on page 2')
})