diff --git a/README.md b/README.md index 48573214a..316e53af9 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,8 @@ console.log(stations[0]); #### Searching for stations +##### Geographic search + You can search for stations by proximity using the `near` and `nearest` functions: ```typescript @@ -69,6 +71,41 @@ Both functions take the following parameters: - `maxDistance`: Maximum distance in kilometers to search for stations (default: `50` km). - `maxResults`: Maximum number of results to return (default: `10`). +##### Full-text search + +You can search for stations by name, region, country, or continent using the `search` function. It supports fuzzy matching and prefix search: + +```typescript +import { search } from "@neaps/tide-database"; + +// Search for stations by name with fuzzy matching +const results = search("Boston"); +console.log("Found:", results.length, "stations"); +console.log(results[0].name); + +// Search with a filter function +const usStations = search("harbor", { + filter: (station) => station.country === "United States", + maxResults: 10, +}); +console.log("US harbor stations:", usStations); + +// Combine multiple filters +const referenceStations = search("island", { + filter: (station) => + station.type === "reference" && station.continent === "Americas", + maxResults: 20, +}); +console.log("Reference stations:", referenceStations); +``` + +The `search` function takes the following parameters: + +- `query` (required): Search string. Supports fuzzy matching and prefix search. +- `options` (optional): + - `filter`: Function that takes a station and returns `true` to include it in results, or `false` to exclude it. + - `maxResults`: Maximum number of results to return (default: `20`). + ## Data Format Each tide station is defined in a single JSON file in the [`data/`](./data) directory that includes basic station information, like location and name, and harmonics or subordinate station offsets. The format is defined by the schema in [../schemas/station.schema.json](schemas/station.schema.json), which includes more detailed descriptions of each field. All data is validated against this schema automatically on each change. diff --git a/package.json b/package.json index e6aa95c63..9ac2a5938 100644 --- a/package.json +++ b/package.json @@ -62,6 +62,7 @@ ], "dependencies": { "geokdbush": "^2.0.1", - "kdbush": "^4.0.2" + "kdbush": "^4.0.2", + "minisearch": "^7.2.0" } } diff --git a/src/index.ts b/src/index.ts index f2b501c93..e73ad32eb 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,4 +1,4 @@ export * from "./constituents.js"; export * from "./stations.js"; -export * from "./search.js"; +export * from "./search/index.js"; export type * from "./types.js"; diff --git a/src/search-index.ts b/src/search/geo.ts similarity index 75% rename from src/search-index.ts rename to src/search/geo.ts index f8f7d681f..21997191d 100644 --- a/src/search-index.ts +++ b/src/search/geo.ts @@ -4,10 +4,10 @@ import KDBush from "kdbush"; * Create a search index for stations and return it as a base64 string, which can be * inlinted at build time by using the `macro` import type: * - * import { createIndex } from "./search-index.js" with { type: "macro" }; + * import { createGeoIndex } from "./search-index.js" with { type: "macro" }; */ -export async function createIndex() { - const { stations } = await import("./stations.js"); +export async function createGeoIndex() { + const { stations } = await import("../stations.js"); const index = new KDBush(stations.length); @@ -20,7 +20,7 @@ export async function createIndex() { return Buffer.from(index.data).toString("base64"); } -export function loadIndex(data: string): KDBush { +export function loadGeoIndex(data: string): KDBush { return KDBush.from(base64ToArrayBuffer(data)); } diff --git a/src/search.ts b/src/search/index.ts similarity index 55% rename from src/search.ts rename to src/search/index.ts index 19998995e..be7e2d2bf 100644 --- a/src/search.ts +++ b/src/search/index.ts @@ -1,8 +1,10 @@ import { around, distance } from "geokdbush"; -import { stations } from "./stations.js"; -import { createIndex } from "./search-index.js" with { type: "macro" }; -import { loadIndex } from "./search-index.js"; -import type { Station } from "./types.js"; +import { stations } from "../stations.js"; +import { createGeoIndex } from "./geo.js" with { type: "macro" }; +import { loadGeoIndex } from "./geo.js"; +import { createTextIndex } from "./text.js" with { type: "macro" }; +import { loadTextIndex } from "./text.js"; +import type { Station } from "../types.js"; export type Position = Latitude & Longitude; type Latitude = { latitude: number } | { lat: number }; @@ -17,13 +19,19 @@ export type NearOptions = NearestOptions & { maxResults?: number; }; +export type TextSearchOptions = { + filter?: (station: Station) => boolean; + maxResults?: number; +}; + /** * A tuple of a station and its distance from a given point, in kilometers. */ export type StationWithDistance = [Station, number]; -// Load the index, which gets inlined at build time -const index = loadIndex(await createIndex()); +// Load the indexes, which get inlined at build time +const geoIndex = loadGeoIndex(await createGeoIndex()); +const textIndex = loadTextIndex(await createTextIndex()); /** * Find stations near a given position. @@ -37,7 +45,7 @@ export function near({ const point = positionToPoint(position); const ids: number[] = around( - index, + geoIndex, ...point, maxResults, maxDistance, @@ -68,3 +76,30 @@ export function positionToPoint(options: Position): [number, number] { const latitude = "latitude" in options ? options.latitude : options.lat; return [longitude, latitude]; } + +const stationMap = new Map(stations.map((s) => [s.id, s])); + +/** + * Search for stations by text across name, region, country, and continent. + * Supports fuzzy matching and prefix search. + */ +export function search( + query: string, + { filter, maxResults = 20 }: TextSearchOptions = {}, +): Station[] { + const searchOptions: Parameters[1] = {}; + + if (filter) { + searchOptions.filter = (result) => { + const station = stationMap.get(result.id); + return station ? filter(station) : false; + }; + } + + const results = textIndex.search(query, searchOptions); + + return results + .slice(0, maxResults) + .map((result) => stationMap.get(result.id)!) + .filter(Boolean); +} diff --git a/src/search/text.ts b/src/search/text.ts new file mode 100644 index 000000000..ef90cadf2 --- /dev/null +++ b/src/search/text.ts @@ -0,0 +1,39 @@ +import MiniSearch, { type Options } from "minisearch"; +import type { Station } from "../types.js"; + +const textSearchIndexOptions: Options = { + fields: ["name", "region", "country", "continent", "source.id"], + extractField: (station, fieldName) => { + if (fieldName in station) { + return (station as any)[fieldName]; + } else if (fieldName === "source.id") { + return station.source.id; + } + }, + searchOptions: { + boost: { + name: 3, + }, + fuzzy: 0.2, + prefix: true, + }, +}; + +/** + * Create a text search index for stations and return it as a JSON string, which can be + * inlined at build time by using the `macro` import type: + * + * import { createTextIndex } from "./text-search-index.js" with { type: "macro" }; + */ +export async function createTextIndex() { + const { stations } = await import("../stations.js"); + + const index = new MiniSearch(textSearchIndexOptions); + index.addAll(stations); + + return JSON.stringify(index.toJSON()); +} + +export function loadTextIndex(data: string): MiniSearch { + return MiniSearch.loadJSON(data, textSearchIndexOptions); +} diff --git a/test/search.test.ts b/test/search.test.ts index 61101dad9..0c9fd7dd2 100644 --- a/test/search.test.ts +++ b/test/search.test.ts @@ -1,5 +1,5 @@ import { describe, test, expect } from "vitest"; -import { near, nearest } from "../src/index.js"; +import { near, nearest, search } from "../src/index.js"; describe("near", () => { [ @@ -67,3 +67,77 @@ describe("nearest", () => { expect(nearest({ lon: 0, lat: 0, maxDistance: 1 })).toBe(null); }); }); + +describe("search", () => { + test("searches by name", () => { + const results = search("Boston"); + expect(results.length).toBeGreaterThan(0); + expect(results[0]!.name.toUpperCase()).toContain("BOSTON"); + }); + + test("searches by source id", () => { + const results = search("9414290"); + expect(results.length).toBeGreaterThan(0); + expect(results[0]!.source.id).toBe("9414290"); + }); + + test("searches by region", () => { + const results = search("HI"); + expect(results.length).toBeGreaterThan(0); + const hasHawaiiStation = results.some((s) => s.region === "HI"); + expect(hasHawaiiStation).toBe(true); + }); + + test("searches by country", () => { + const results = search("Canada"); + expect(results.length).toBeGreaterThan(0); + const hasCanadianStation = results.some((s) => s.country === "Canada"); + expect(hasCanadianStation).toBe(true); + }); + + test("searches by continent", () => { + const results = search("Europe"); + expect(results.length).toBeGreaterThan(0); + results.forEach((station) => { + expect(station.continent).toBe("Europe"); + }); + }); + + test("supports fuzzy matching", () => { + const results = search("Bosten"); // Misspelled Boston + expect(results.length).toBeGreaterThan(0); + expect(results[0]!.name.toUpperCase()).toContain("BOSTON"); + }); + + test("supports prefix search", () => { + const results = search("San"); + expect(results.length).toBeGreaterThan(0); + const hasSanFrancisco = results.some((s) => s.name.includes("San")); + expect(hasSanFrancisco).toBe(true); + }); + + test("combines query with filters", () => { + const results = search("Harbor", { + filter: (station) => + station.type === "reference" && + station.country === "United States" && + station.continent === "Americas", + }); + expect(results.length).toBeGreaterThan(0); + results.forEach((station) => { + expect(station.type).toBe("reference"); + expect(station.country).toBe("United States"); + expect(station.continent).toBe("Americas"); + }); + }); + + test("respects maxResults", () => { + const results = search("Harbor", { maxResults: 5 }); + expect(results.length).toBeLessThanOrEqual(5); + }); + + test("defaults maxResults to 20", () => { + const results = search("Bay"); + expect(results.length).toBeLessThanOrEqual(20); + }); +});