From 79fbe355572c41d825a8effe21dcf0a9f54ff004 Mon Sep 17 00:00:00 2001 From: Anna Shishlyakova Date: Thu, 12 Jun 2025 17:21:02 +0200 Subject: [PATCH 1/9] Atom pagination support --- core/loader/atom.ts | 79 ++++++++++++++++++++-- core/loader/utils.ts | 2 +- core/test/loader/atom.test.ts | 123 +++++++++++++++++++++++++++++++++- 3 files changed, 195 insertions(+), 9 deletions(-) diff --git a/core/loader/atom.ts b/core/loader/atom.ts index 62a6eace..52648212 100644 --- a/core/loader/atom.ts +++ b/core/loader/atom.ts @@ -1,8 +1,9 @@ -import type { TextResponse } from '../download.ts' +import type { DownloadTask, TextResponse } from '../download.ts' import type { OriginPost } from '../post.ts' -import { createPostsList } from '../posts-list.ts' +import { createPostsList, type PostsListLoader } from '../posts-list.ts' import type { Loader } from './index.ts' import { + buildFullURL, findAnchorHrefs, findDocumentLinks, findHeaderLinks, @@ -36,6 +37,69 @@ function parsePosts(text: TextResponse): OriginPost[] { }) } +/** + * Returns next or previous pagination url from feed xml, if present. + * See "paged feeds" https://www.rfc-editor.org/rfc/rfc5005#section-3 + */ +function getPaginationUrl( + xmlResponse: TextResponse, + rel: 'first' | 'last' | 'next' | 'previous' +): string | undefined { + let document = xmlResponse.parseXml() + if (!document) return undefined + let nextPageLink = [...document.querySelectorAll('link')].find( + link => link.getAttribute('rel') === rel + ) + return nextPageLink ? buildFullURL(nextPageLink, xmlResponse.url) : undefined +} + +type PostsCursor = + | [OriginPost[], PostsListLoader | undefined] + | [undefined, PostsListLoader] + +/** + * If xml response is ready, returns a tuple of posts and possibly + * the loader of the next portion of posts, if xml contains a link to them. + * If xml response is not yet ready, returns the recursive loader of posts. + */ +function getPostsCursor( + task: DownloadTask, + feedUrl: string, + feedResponse: TextResponse | undefined +): PostsCursor { + if (!feedResponse) { + return [ + undefined, + async () => { + let response = await task.text(feedUrl) + let [posts, loader] = getPostsCursor(task, feedUrl, response) + return [posts, loader] as [OriginPost[], PostsListLoader | undefined] + } + ] + } + let nextPageUrl = getPaginationUrl(feedResponse, 'next') + let posts = parsePosts(feedResponse) + if (nextPageUrl) { + return [ + posts, + async () => { + let nextPageResponse = await task.text(nextPageUrl) + let [nextPosts, loader] = getPostsCursor( + task, + nextPageUrl, + nextPageResponse + ) as [OriginPost[], PostsListLoader | undefined] + return [nextPosts, loader] as [ + OriginPost[], + PostsListLoader | undefined + ] + } + ] + } else { + return [posts, undefined] + } +} + export const atom: Loader = { getMineLinksFromText(text) { let type = 'application/atom+xml' @@ -54,12 +118,13 @@ export const atom: Loader = { }, getPosts(task, url, text) { - if (text) { - return createPostsList(parsePosts(text), undefined) + let [posts, nextLoader] = getPostsCursor(task, url, text) + if (posts) { + return createPostsList(posts, nextLoader) + } else if (nextLoader) { + return createPostsList(undefined, nextLoader) } else { - return createPostsList(undefined, async () => { - return [parsePosts(await task.text(url)), undefined] - }) + return createPostsList([], undefined) } }, diff --git a/core/loader/utils.ts b/core/loader/utils.ts index cc5d29f4..e049bc67 100644 --- a/core/loader/utils.ts +++ b/core/loader/utils.ts @@ -14,7 +14,7 @@ export function isHTML(text: TextResponse): boolean { * the explicitly provided base URL, but also the base URL specified * in the document. */ -function buildFullURL( +export function buildFullURL( link: HTMLAnchorElement | HTMLLinkElement, baseUrl: string ): string { diff --git a/core/test/loader/atom.test.ts b/core/test/loader/atom.test.ts index 0d628ba9..b4d5b927 100644 --- a/core/test/loader/atom.test.ts +++ b/core/test/loader/atom.test.ts @@ -2,13 +2,15 @@ import '../dom-parser.ts' import { spyOn } from 'nanospy' import { deepStrictEqual, equal } from 'node:assert' -import { test } from 'node:test' +import { afterEach, beforeEach, test } from 'node:test' import { setTimeout } from 'node:timers/promises' import { + checkAndRemoveRequestMock, createDownloadTask, createTextResponse, loaders, + mockRequest, type TextResponse } from '../../index.ts' @@ -20,6 +22,14 @@ function exampleAtom(responseBody: string): TextResponse { }) } +beforeEach(() => { + mockRequest() +}) + +afterEach(() => { + checkAndRemoveRequestMock() +}) + test('detects xml:base attribute', () => { deepStrictEqual( loaders.atom.getMineLinksFromText( @@ -457,3 +467,114 @@ test('parses media', () => { } ) }) + +test('detects pagination with rel="next" link', () => { + let $store = loaders.atom.getPosts( + createDownloadTask(), + 'https://example.com/feed/', + exampleAtom( + ` + + + + ` + ) + ) + equal($store.get().hasNext, true) +}) + +test('detects when there is no pagination', () => { + let $store = loaders.atom.getPosts( + createDownloadTask(), + 'https://example.com/feed/', + exampleAtom( + ` + + ` + ) + ) + equal($store.get().hasNext, false) +}) + +test('loads first then second page', async () => { + let task = createDownloadTask() + + let callCount = 0 + let textSpy = spyOn(task, 'text', () => { + callCount++ + if (callCount === 1) { + // First page + return Promise.resolve( + exampleAtom( + ` + + + ` + ) + ) + } else { + // Second page + return Promise.resolve( + exampleAtom( + ` + + ` + ) + ) + } + }) + + let posts = loaders.atom.getPosts(task, 'https://example.com/feed') + await posts.next() + + deepStrictEqual(textSpy.calls, [ + ['https://example.com/feed'], + ['https://example.com/feed?page=2'] + ]) +}) + +test('has posts from both pages', async () => { + let task = createDownloadTask() + + let callCount = 0 + spyOn(task, 'text', () => { + callCount++ + if (callCount === 1) { + // First page + return Promise.resolve( + exampleAtom( + ` + + + + Post on page 1 + 1 + 2023-01-01T00:00:00Z + + ` + ) + ) + } else { + // Second page + return Promise.resolve( + exampleAtom( + ` + + + Post on page 2 + 2 + 2023-01-02T00:00:00Z + + ` + ) + ) + } + }) + + let posts = loaders.atom.getPosts(task, 'https://example.com/feed') + await posts.next() + + equal(posts.get().list.length, 2) + equal(posts.get().list[0]?.title, 'Post on page 1') + equal(posts.get().list[1]?.title, 'Post on page 2') +}) From 562ced3df0a1fd86faba96b70c69647dcf5b0a57 Mon Sep 17 00:00:00 2001 From: Anna Shishlyakova Date: Thu, 12 Jun 2025 21:08:22 +0200 Subject: [PATCH 2/9] Coverage and CI test fix --- core/loader/atom.ts | 6 ++---- core/test/loader/atom.test.ts | 10 ++++++++++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/core/loader/atom.ts b/core/loader/atom.ts index 52648212..00c584a2 100644 --- a/core/loader/atom.ts +++ b/core/loader/atom.ts @@ -119,12 +119,10 @@ export const atom: Loader = { getPosts(task, url, text) { let [posts, nextLoader] = getPostsCursor(task, url, text) - if (posts) { - return createPostsList(posts, nextLoader) - } else if (nextLoader) { + if (!posts && nextLoader) { return createPostsList(undefined, nextLoader) } else { - return createPostsList([], undefined) + return createPostsList(posts || [], nextLoader) } }, diff --git a/core/test/loader/atom.test.ts b/core/test/loader/atom.test.ts index b4d5b927..1cb1a737 100644 --- a/core/test/loader/atom.test.ts +++ b/core/test/loader/atom.test.ts @@ -525,6 +525,11 @@ test('loads first then second page', async () => { }) let posts = loaders.atom.getPosts(task, 'https://example.com/feed') + + // Wait for first page to be loaded + await posts.loading + + // Then load next page await posts.next() deepStrictEqual(textSpy.calls, [ @@ -572,6 +577,11 @@ test('has posts from both pages', async () => { }) let posts = loaders.atom.getPosts(task, 'https://example.com/feed') + + // Wait for first page to be loaded + await posts.loading + + // Then load next page await posts.next() equal(posts.get().list.length, 2) From 28f32a64d7ef6f5fb48b81d148962b8c1569d027 Mon Sep 17 00:00:00 2001 From: Anna Shishlyakova Date: Fri, 13 Jun 2025 07:32:16 +0200 Subject: [PATCH 3/9] Re-run workflows --- core/loader/atom.ts | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/core/loader/atom.ts b/core/loader/atom.ts index 00c584a2..f784c0bd 100644 --- a/core/loader/atom.ts +++ b/core/loader/atom.ts @@ -119,11 +119,7 @@ export const atom: Loader = { getPosts(task, url, text) { let [posts, nextLoader] = getPostsCursor(task, url, text) - if (!posts && nextLoader) { - return createPostsList(undefined, nextLoader) - } else { - return createPostsList(posts || [], nextLoader) - } + return createPostsList(posts || [], nextLoader) }, getSuggestedLinksFromText(text) { From 6cffe287ffc3719e0759b469c0843b8a4f9fe7e5 Mon Sep 17 00:00:00 2001 From: Anna Shishlyakova Date: Fri, 13 Jun 2025 07:33:34 +0200 Subject: [PATCH 4/9] Format --- core/test/loader/atom.test.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/test/loader/atom.test.ts b/core/test/loader/atom.test.ts index 1cb1a737..5524ca1a 100644 --- a/core/test/loader/atom.test.ts +++ b/core/test/loader/atom.test.ts @@ -525,10 +525,10 @@ test('loads first then second page', async () => { }) let posts = loaders.atom.getPosts(task, 'https://example.com/feed') - + // Wait for first page to be loaded await posts.loading - + // Then load next page await posts.next() @@ -577,10 +577,10 @@ test('has posts from both pages', async () => { }) let posts = loaders.atom.getPosts(task, 'https://example.com/feed') - + // Wait for first page to be loaded await posts.loading - + // Then load next page await posts.next() From 4b5a1376c43757bfc38e7e5c624988b1e423387c Mon Sep 17 00:00:00 2001 From: Anna Shishlyakova Date: Fri, 13 Jun 2025 07:38:29 +0200 Subject: [PATCH 5/9] Coverage --- core/loader/atom.ts | 6 +++++- core/test/loader/atom.test.ts | 6 ------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/core/loader/atom.ts b/core/loader/atom.ts index f784c0bd..00c584a2 100644 --- a/core/loader/atom.ts +++ b/core/loader/atom.ts @@ -119,7 +119,11 @@ export const atom: Loader = { getPosts(task, url, text) { let [posts, nextLoader] = getPostsCursor(task, url, text) - return createPostsList(posts || [], nextLoader) + if (!posts && nextLoader) { + return createPostsList(undefined, nextLoader) + } else { + return createPostsList(posts || [], nextLoader) + } }, getSuggestedLinksFromText(text) { diff --git a/core/test/loader/atom.test.ts b/core/test/loader/atom.test.ts index 5524ca1a..23738f4b 100644 --- a/core/test/loader/atom.test.ts +++ b/core/test/loader/atom.test.ts @@ -526,10 +526,7 @@ test('loads first then second page', async () => { let posts = loaders.atom.getPosts(task, 'https://example.com/feed') - // Wait for first page to be loaded await posts.loading - - // Then load next page await posts.next() deepStrictEqual(textSpy.calls, [ @@ -578,10 +575,7 @@ test('has posts from both pages', async () => { let posts = loaders.atom.getPosts(task, 'https://example.com/feed') - // Wait for first page to be loaded await posts.loading - - // Then load next page await posts.next() equal(posts.get().list.length, 2) From ae57c98030bd01f76bb2560cbe41cab1d03f8d7f Mon Sep 17 00:00:00 2001 From: Anna Shishlyakova Date: Fri, 13 Jun 2025 09:16:16 +0200 Subject: [PATCH 6/9] No typecast, use expectRequest, docs update --- core/README.md | 19 ++++++ core/loader/atom.ts | 9 +-- core/request.ts | 2 + core/test/loader/atom.test.ts | 110 +++++++++++++--------------------- 4 files changed, 67 insertions(+), 73 deletions(-) diff --git a/core/README.md b/core/README.md index 63938d0b..b5e54c8b 100644 --- a/core/README.md +++ b/core/README.md @@ -86,3 +86,22 @@ n bnt core/test/html.test.ts -t 'sanitizes HTML' In VS Code you can use [extension](https://marketplace.visualstudio.com/items?itemName=connor4312.nodejs-testing) to run specific test from UI. Open `core/coverage/lcov-report/index.html` to see coverage issues. + +## Mocking requests + +To enable network request mocking in tests, you have to set up and tear down request mock before and after each test: + +```typescript +beforeEach(() => { + mockRequest() +}) + +afterEach(() => { + checkAndRemoveRequestMock() +}) +``` + +In the test itself, before making or triggering the request itself, use either: + +- `expectRequest(url).andRespond(...)` for simple mocking where the response is known upfront. +- or `expectRequest(url).andWait(...)` for complex scenarios where you need to control test loading states or simulate network delays. diff --git a/core/loader/atom.ts b/core/loader/atom.ts index 00c584a2..98b4eb95 100644 --- a/core/loader/atom.ts +++ b/core/loader/atom.ts @@ -73,7 +73,7 @@ function getPostsCursor( async () => { let response = await task.text(feedUrl) let [posts, loader] = getPostsCursor(task, feedUrl, response) - return [posts, loader] as [OriginPost[], PostsListLoader | undefined] + return [posts || [], loader] } ] } @@ -88,11 +88,8 @@ function getPostsCursor( task, nextPageUrl, nextPageResponse - ) as [OriginPost[], PostsListLoader | undefined] - return [nextPosts, loader] as [ - OriginPost[], - PostsListLoader | undefined - ] + ) + return [nextPosts || [], loader] } ] } else { diff --git a/core/request.ts b/core/request.ts index b3465e7b..c3267c6d 100644 --- a/core/request.ts +++ b/core/request.ts @@ -86,11 +86,13 @@ export function expectRequest(url: string): RequestMock { } requestExpects.push(expect) return { + /** Immediately sets up a mock response that will be returned synchronously */ andRespond(status, body = '', contentType = 'text/html') { expect.contentType = contentType expect.status = status expect.response = body }, + /** Returns a function that allows more control over the response */ andWait() { let { promise, resolve } = Promise.withResolvers() expect.wait = promise diff --git a/core/test/loader/atom.test.ts b/core/test/loader/atom.test.ts index 23738f4b..6b61bd8a 100644 --- a/core/test/loader/atom.test.ts +++ b/core/test/loader/atom.test.ts @@ -9,6 +9,7 @@ import { checkAndRemoveRequestMock, createDownloadTask, createTextResponse, + expectRequest, loaders, mockRequest, type TextResponse @@ -499,83 +500,58 @@ test('detects when there is no pagination', () => { test('loads first then second page', async () => { let task = createDownloadTask() - let callCount = 0 - let textSpy = spyOn(task, 'text', () => { - callCount++ - if (callCount === 1) { - // First page - return Promise.resolve( - exampleAtom( - ` - - - ` - ) - ) - } else { - // Second page - return Promise.resolve( - exampleAtom( - ` - - ` - ) - ) - } - }) - + expectRequest('https://example.com/feed').andRespond( + 200, + ` + + + `, + 'application/atom+xml' + ) let posts = loaders.atom.getPosts(task, 'https://example.com/feed') - await posts.loading - await posts.next() - deepStrictEqual(textSpy.calls, [ - ['https://example.com/feed'], - ['https://example.com/feed?page=2'] - ]) + expectRequest('https://example.com/feed?page=2').andRespond( + 200, + ` + + `, + 'application/atom+xml' + ) + await posts.next() }) test('has posts from both pages', async () => { let task = createDownloadTask() - let callCount = 0 - spyOn(task, 'text', () => { - callCount++ - if (callCount === 1) { - // First page - return Promise.resolve( - exampleAtom( - ` - - - - Post on page 1 - 1 - 2023-01-01T00:00:00Z - - ` - ) - ) - } else { - // Second page - return Promise.resolve( - exampleAtom( - ` - - - Post on page 2 - 2 - 2023-01-02T00:00:00Z - - ` - ) - ) - } - }) - + expectRequest('https://example.com/feed').andRespond( + 200, + ` + + + + Post on page 1 + 1 + 2023-01-01T00:00:00Z + + `, + 'application/atom+xml' + ) let posts = loaders.atom.getPosts(task, 'https://example.com/feed') - await posts.loading + + expectRequest('https://example.com/feed?page=2').andRespond( + 200, + ` + + + Post on page 2 + 2 + 2023-01-02T00:00:00Z + + `, + 'application/atom+xml' + ) await posts.next() equal(posts.get().list.length, 2) From 8561a718ecefccb8857fcf45e0ecea98ba10d93f Mon Sep 17 00:00:00 2001 From: Anna Shishlyakova Date: Fri, 13 Jun 2025 12:22:15 +0200 Subject: [PATCH 7/9] Readme fix --- core/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/README.md b/core/README.md index b5e54c8b..6dde8286 100644 --- a/core/README.md +++ b/core/README.md @@ -7,6 +7,7 @@ _See the [full architecture guide](../README.md) first._ - [Client Environments](#client-environments) - [URL Routing](#url-routing) - [Test Strategy](#test-strategy) + - [Mocking Requests](#mocking-requests) ## Project Structure @@ -87,7 +88,7 @@ In VS Code you can use [extension](https://marketplace.visualstudio.com/items?it Open `core/coverage/lcov-report/index.html` to see coverage issues. -## Mocking requests +### Mocking Requests To enable network request mocking in tests, you have to set up and tear down request mock before and after each test: From fd03e54568e0abf49098f5be2f9b2ad953ea7bd5 Mon Sep 17 00:00:00 2001 From: Anna Shishlyakova Date: Fri, 13 Jun 2025 12:28:19 +0200 Subject: [PATCH 8/9] Update core/README.md Co-authored-by: Andrey Sitnik --- core/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/README.md b/core/README.md index 6dde8286..dd714e8e 100644 --- a/core/README.md +++ b/core/README.md @@ -7,7 +7,7 @@ _See the [full architecture guide](../README.md) first._ - [Client Environments](#client-environments) - [URL Routing](#url-routing) - [Test Strategy](#test-strategy) - - [Mocking Requests](#mocking-requests) +- [Mocking Requests](#mocking-requests) ## Project Structure From af5d1907bc27c003717e40da78b4407210d6eab2 Mon Sep 17 00:00:00 2001 From: Anna Shishlyakova Date: Fri, 13 Jun 2025 12:34:15 +0200 Subject: [PATCH 9/9] Markdown fix --- core/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/README.md b/core/README.md index dd714e8e..5ebb815f 100644 --- a/core/README.md +++ b/core/README.md @@ -88,7 +88,7 @@ In VS Code you can use [extension](https://marketplace.visualstudio.com/items?it Open `core/coverage/lcov-report/index.html` to see coverage issues. -### Mocking Requests +## Mocking Requests To enable network request mocking in tests, you have to set up and tear down request mock before and after each test: