Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .env.template
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ NODE_ENV=development
MONGODB_URI=mongodb://mongodb:27017/bt?replicaSet=rs0
REDIS_URI=redis://redis:6379
BACKEND_URL=http://backend:8080
SEMANTIC_SEARCH_URL=http://semantic-search:8000
TZ=America/Los_Angeles # for tslog

SIS_CLASS_APP_ID=_
Expand Down
2 changes: 1 addition & 1 deletion apps/backend/scripts/prepare-typedefs.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ const typedefFiles = fs.readdirSync(sourceDir)
.sort();

// Get all module directories from backend/src/modules (excluding non-module directories)
const excludedDirs = ['cache', 'generated-types'];
const excludedDirs = ['cache', 'generated-types', 'semantic-search'];
const moduleDirs = fs.readdirSync(modulesDir, { withFileTypes: true })
.filter(dirent => dirent.isDirectory() && !excludedDirs.includes(dirent.name))
.map(dirent => dirent.name)
Expand Down
4 changes: 4 additions & 0 deletions apps/backend/src/bootstrap/loaders/express.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import helmet from "helmet";
import { RedisClientType } from "redis";

import { config } from "../../config";
import semanticSearchRoutes from "../../modules/semantic-search/routes";
import passportLoader from "./passport";

export default async (
Expand Down Expand Up @@ -58,6 +59,9 @@ export default async (
// load authentication
passportLoader(app, redis);

// load semantic search routes
app.use("/semantic-search", semanticSearchRoutes);

app.use(
config.graphqlPath,
expressMiddleware(server, {
Expand Down
6 changes: 6 additions & 0 deletions apps/backend/src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ export interface Config {
backendPath: string;
graphqlPath: string;
isDev: boolean;
semanticSearch: {
url: string;
};
mongoDB: {
uri: string;
};
Expand All @@ -45,6 +48,9 @@ export const config: Config = {
backendPath: env("BACKEND_PATH"),
graphqlPath: env("GRAPHQL_PATH"),
isDev: env("NODE_ENV") === "development",
semanticSearch: {
url: env("SEMANTIC_SEARCH_URL"),
},
mongoDB: {
uri: env("MONGODB_URI"),
},
Expand Down
67 changes: 67 additions & 0 deletions apps/backend/src/modules/semantic-search/client.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import { config } from "../../config";

interface SemanticSearchResult {
subject: string;
courseNumber: string;
title: string;
description: string;
score: number;
text: string;
}

interface SemanticSearchResponse {
query: string;
threshold: number;
count: number;
year: number;
semester: string;
allowed_subjects: string[] | null;
last_refreshed: string;
results: SemanticSearchResult[];
}

export async function searchSemantic(
query: string,
year: number,
semester: string,
allowedSubjects?: string[],
threshold: number = 0.3
): Promise<SemanticSearchResponse> {
const params = new URLSearchParams({
query,
threshold: String(threshold),
year: String(year),
semester,
});

if (allowedSubjects && allowedSubjects.length > 0) {
allowedSubjects.forEach((subject) => {
params.append("allowed_subjects", subject);
});
}

const url = `${config.semanticSearch.url}/search?${params}`;

try {
const response = await fetch(url);

if (!response.ok) {
throw new Error(`Semantic search failed: ${response.statusText}`);
}

return (await response.json()) as SemanticSearchResponse;
} catch (error) {
console.error("Semantic search error:", error);
// Return empty results on error, gracefully falling back
return {
query,
threshold,
count: 0,
year,
semester,
allowed_subjects: allowedSubjects || null,
last_refreshed: new Date().toISOString(),
results: [],
};
}
}
50 changes: 50 additions & 0 deletions apps/backend/src/modules/semantic-search/controller.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import { Request, Response } from "express";

import { searchSemantic } from "./client";

/**
* Lightweight semantic search endpoint that only returns course identifiers
* Frontend will use these to filter the already-loaded catalog
*/
export async function searchCourses(req: Request, res: Response) {
const { query, year, semester, threshold } = req.query;

if (!query || typeof query !== "string") {
return res.status(400).json({ error: "query parameter is required" });
}

const yearNum = year ? parseInt(year as string, 10) : undefined;
const semesterStr = semester as string | undefined;
const thresholdNum = threshold ? parseFloat(threshold as string) : 0.3;

try {
const results = await searchSemantic(
query,
yearNum!,
semesterStr!,
undefined,
thresholdNum
);

// Return lightweight response: only subject + courseNumber + score
const courseIds = results.results.map((r) => ({
subject: r.subject,
courseNumber: r.courseNumber,
score: r.score,
}));

return res.json({
query,
threshold: thresholdNum,
results: courseIds,
count: courseIds.length,
});
} catch (error) {
console.error("Semantic search error:", error);
return res.status(500).json({
error: "Semantic search failed",
results: [],
count: 0,
});
}
}
113 changes: 113 additions & 0 deletions apps/backend/src/modules/semantic-search/routes.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import { type Response, Router } from "express";
import type { ParsedQs } from "qs";
import { RequestInit, fetch } from "undici";

import { config } from "../../config";
import { searchCourses } from "./controller";

const router = Router();
const baseUrl = config.semanticSearch.url.replace(/\/$/, "");

type QueryValue = string | ParsedQs | Array<string | ParsedQs> | undefined;

const asString = (value: QueryValue): string | undefined => {
if (!value) return undefined;
if (typeof value === "string") return value;
if (Array.isArray(value)) {
for (const entry of value) {
const found = asString(entry as QueryValue);
if (found) return found;
}
}
return undefined;
};

const toStringList = (value: QueryValue): string[] => {
if (!value) return [];
if (Array.isArray(value)) {
const items: string[] = [];
for (const entry of value) {
items.push(...toStringList(entry as QueryValue));
}
return items;
}
return typeof value === "string" && value.length > 0 ? [value] : [];
};

async function forward(
target: string,
init: RequestInit,
res: Response
): Promise<void> {
try {
const response = await fetch(target, init);
const contentType = response.headers.get("content-type") ?? "";
const raw = await response.text();

if (contentType.includes("application/json")) {
const payload = raw ? JSON.parse(raw) : {};
res.status(response.status).json(payload);
} else {
res.status(response.status).send(raw);
}
} catch (error) {
console.error("Semantic search proxy error:", error);
res.status(502).json({
error: "Unable to reach semantic search service",
details: String(error),
});
}
}

router.get("/health", async (_req, res) => {
await forward(`${baseUrl}/health`, { method: "GET" }, res);
});

router.post("/refresh", async (req, res) => {
const body = req.body ?? {};
await forward(
`${baseUrl}/refresh`,
{
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify(body),
},
res
);
});

// Lightweight endpoint: returns only course identifiers for frontend filtering
router.get("/courses", searchCourses);

// Full proxy endpoint (kept for backwards compatibility)
router.get("/search", async (req, res) => {
const query = asString(req.query.query);
if (!query || !query.trim()) {
res.status(400).json({ error: "query parameter is required" });
return;
}

const params = new URLSearchParams({ query });

const topK = asString(req.query.top_k);
if (topK) params.set("top_k", topK);

const year = asString(req.query.year);
if (year) params.set("year", year);

const semester = asString(req.query.semester);
if (semester) params.set("semester", semester);

const allowedSubjects = toStringList(req.query.allowed_subjects);
allowedSubjects.forEach((subject) =>
params.append("allowed_subjects", subject)
);

await forward(
`${baseUrl}/search?${params.toString()}`,
{ method: "GET" },
res
);
});

export default router;
2 changes: 2 additions & 0 deletions apps/datapuller/src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import enrollmentHistoriesPuller from "./pullers/enrollment";
import enrollmentTimeframePuller from "./pullers/enrollment-timeframe";
import gradeDistributionsPuller from "./pullers/grade-distributions";
import sectionsPuller from "./pullers/sections";
import semanticSearchPuller from "./pullers/semantic-search";
import termsPuller from "./pullers/terms";
import setup from "./shared";
import { Config } from "./shared/config";
Expand All @@ -30,6 +31,7 @@ const pullerMap: {
"enrollment-timeframe": enrollmentTimeframePuller.syncEnrollmentTimeframe,
"terms-all": termsPuller.allTerms,
"terms-nearby": termsPuller.nearbyTerms,
"semantic-search-refresh": semanticSearchPuller.refreshSemanticSearch,
} as const;

const runPuller = async () => {
Expand Down
66 changes: 66 additions & 0 deletions apps/datapuller/src/pullers/semantic-search.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import { TermModel } from "@repo/common";

import { Config } from "../shared/config";

const refreshSemanticSearch = async (config: Config) => {
const { log, SEMANTIC_SEARCH_URL } = config;

log.trace("Refreshing semantic search indices...");

// Find all active terms (terms that are currently open or will open soon)
const now = new Date();
const activeTerms = await TermModel.find({
endDate: { $gte: now },
})
.sort({ startDate: 1 })
.limit(3) // Refresh current and next 2 terms
.lean();

if (activeTerms.length === 0) {
log.info("No active terms found to refresh.");
return;
}

log.info(`Found ${activeTerms.length} active term(s) to refresh.`);

for (const term of activeTerms) {
try {
const year = term.year;
const semester = term.semester;

log.trace(`Refreshing index for ${year} ${semester}...`);

const response = await fetch(`${SEMANTIC_SEARCH_URL}/refresh`, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
year,
semester,
}),
});

if (!response.ok) {
const errorText = await response.text();
throw new Error(
`Failed to refresh ${year} ${semester}: ${response.status} ${errorText}`
);
}

const result = await response.json();
log.info(`Refreshed ${year} ${semester}: ${result.size} courses indexed`);
} catch (error: any) {
log.error(
`Error refreshing ${term.year} ${term.semester}: ${error.message}`
);
// Continue with other terms even if one fails
}
}

log.trace("Semantic search refresh completed.");
};

export default {
refreshSemanticSearch,
};
2 changes: 2 additions & 0 deletions apps/datapuller/src/shared/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ export interface Config {
WORKGROUP: string;
};
BACKEND_URL: string;
SEMANTIC_SEARCH_URL: string;
}

export function loadConfig(): Config {
Expand Down Expand Up @@ -64,5 +65,6 @@ export function loadConfig(): Config {
WORKGROUP: env("AWS_WORKGROUP"),
},
BACKEND_URL: env("BACKEND_URL"),
SEMANTIC_SEARCH_URL: env("SEMANTIC_SEARCH_URL"),
};
}
Loading