diff --git a/apps/backend/src/routes/health.js b/apps/backend/src/routes/health.js index 35926e4..ecdc42b 100644 --- a/apps/backend/src/routes/health.js +++ b/apps/backend/src/routes/health.js @@ -4,7 +4,7 @@ import path from 'path'; import unzipper from 'unzipper'; import { readTemplateManifest, addTemplateToManifest } from '../services/templateService.js'; import { TEMPLATE_DIR } from '../config/constants.js'; -import { ensureDir } from '../utils/fsUtils.js'; +import { ensureDir, listFilesRecursive } from '../utils/fsUtils.js'; import { sanitizeUploadPath } from '../utils/pathUtils.js'; import { safeJoin } from '../utils/pathUtils.js'; @@ -16,6 +16,29 @@ export function registerHealthRoutes(fastify) { return { templates, categories }; }); + fastify.get('/api/templates/:templateId/files', async (req, reply) => { + const { templateId } = req.params || {}; + if (!templateId) { + return reply.code(400).send({ error: 'templateId is required.' }); + } + + let templateRoot; + try { + templateRoot = safeJoin(TEMPLATE_DIR, templateId); + await fs.access(templateRoot); + } catch { + return reply.code(404).send({ error: `Template not found: ${templateId}` }); + } + + const allFiles = await listFilesRecursive(templateRoot); + const texFiles = allFiles + .filter(f => f.type === 'file' && f.path.toLowerCase().endsWith('.tex')) + .map(f => f.path) + .sort((a, b) => a.localeCompare(b)); + + return { files: texFiles }; + }); + fastify.post('/api/templates/upload', async (req, reply) => { await ensureDir(TEMPLATE_DIR); let templateId = ''; diff --git a/apps/backend/src/routes/transfer.js b/apps/backend/src/routes/transfer.js index afb467c..655a2d9 100644 --- a/apps/backend/src/routes/transfer.js +++ b/apps/backend/src/routes/transfer.js @@ -4,14 +4,175 @@ import { promises as fs } from 'fs'; import { buildTransferGraph } from '../services/transferAgent/graph.js'; import { buildMineruTransferGraph } from '../services/transferAgent/graphMineru.js'; import { resolveLLMConfig } from '../services/llmService.js'; -import { resolveMineruConfig } from '../services/mineruService.js'; +import { resolveMineruConfig, MINERU_MAX_FILE_BYTES } from '../services/mineruService.js'; +import { registerJobProgressSink, unregisterJobProgressSink } from '../services/transferAgent/runtimeProgress.js'; import { readTemplateManifest } from '../services/templateService.js'; import { DATA_DIR, TEMPLATE_DIR } from '../config/constants.js'; import { ensureDir, readJson, writeJson, copyDir } from '../utils/fsUtils.js'; -// In-memory job store: jobId → { graph, state, status, progressLog } +const JOB_TTL_MS = 30 * 60 * 1000; +const MAX_PROGRESS_LOG_LINES = 2000; +const TERMINAL_STATUSES = new Set(['success', 'failed', 'error']); +const LAYOUT_CHECK_ENABLED = false; + +// In-memory job store: jobId → job record const jobs = new Map(); +function normalizeProgressLog(progressLog) { + if (!progressLog) return []; + const raw = Array.isArray(progressLog) ? progressLog : [progressLog]; + return raw + .map(v => String(v || '').trim()) + .filter(Boolean); +} + +function appendProgressLog(job, progressLog) { + const lines = normalizeProgressLog(progressLog); + if (!lines.length) return; + + for (const line of lines) { + if (job.progressLog[job.progressLog.length - 1] !== line) { + job.progressLog.push(line); + } + } + + if (job.progressLog.length > MAX_PROGRESS_LOG_LINES) { + job.progressLog = job.progressLog.slice(-MAX_PROGRESS_LOG_LINES); + } +} + +function nowIso() { + return new Date().toISOString(); +} + +function isLikelyPdfUpload(fileName, mimeType) { + const normalizedName = String(fileName || '').trim().toLowerCase(); + const normalizedMime = String(mimeType || '').trim().toLowerCase(); + return normalizedName.endsWith('.pdf') || normalizedMime === 'application/pdf'; +} + +function scheduleCleanup(jobId) { + const job = jobs.get(jobId); + if (!job) return; + if (job.cleanupTimer) clearTimeout(job.cleanupTimer); + job.cleanupTimer = setTimeout(() => { + jobs.delete(jobId); + }, JOB_TTL_MS); +} + +function serializeJob(job) { + return { + status: job.status, + progressLog: job.progressLog || [], + error: job.error || null, + currentNode: job.currentNode || null, + startedAt: job.startedAt || null, + updatedAt: job.updatedAt || null, + finishedAt: job.finishedAt || null, + transferMode: job.state?.transferMode || 'legacy', + }; +} + +function mergeNodeChunk(job, chunk) { + if (!chunk || typeof chunk !== 'object') return; + const entries = Object.entries(chunk).filter(([k]) => k !== '__metadata__'); + for (const [nodeName, update] of entries) { + job.currentNode = nodeName; + if (update && typeof update === 'object' && !Array.isArray(update)) { + job.state = { ...job.state, ...update }; + appendProgressLog(job, update.progressLog); + if (update.status) job.status = update.status; + if (update.error) job.error = String(update.error); + } else { + appendProgressLog(job, `[${nodeName}] ${String(update)}`); + } + } +} + +async function executeJob(jobId, fastify) { + const job = jobs.get(jobId); + if (!job || job.running || job.status === 'waiting_upload') return; + if (TERMINAL_STATUSES.has(job.status)) return; + + job.running = true; + job.updatedAt = nowIso(); + if (!job.startedAt) job.startedAt = job.updatedAt; + if (job.status !== 'waiting_images') { + job.status = 'running'; + } + if (!job.currentNode && !job.hasStarted) { + job.currentNode = job.state?.transferMode === 'mineru' ? 'compileSource' : 'analyzeSource'; + } + job.error = null; + + const runConfig = { configurable: { thread_id: jobId } }; + const input = job.hasStarted ? null : job.state; + registerJobProgressSink(jobId, (progressLog) => { + appendProgressLog(job, progressLog); + job.updatedAt = nowIso(); + }); + + try { + const stream = await job.graph.stream(input, runConfig); + for await (const chunk of stream) { + mergeNodeChunk(job, chunk); + job.updatedAt = nowIso(); + } + + job.hasStarted = true; + + try { + if (typeof job.graph.getState === 'function') { + const snapshot = await job.graph.getState(runConfig); + const values = snapshot?.values; + if (values && typeof values === 'object') { + job.state = { ...job.state, ...values }; + const snapshotLines = normalizeProgressLog(values.progressLog); + if (snapshotLines.length > job.progressLog.length) { + job.progressLog = snapshotLines.slice(-MAX_PROGRESS_LOG_LINES); + } + if (values.status) job.status = values.status; + if (values.error) job.error = String(values.error); + } + } + } catch { + // Ignore state snapshot read errors. + } + + if (!job.status || job.status === 'running' || job.status === 'pending') { + const nextStatus = job.state?.status; + if (nextStatus) job.status = nextStatus; + } + + if (TERMINAL_STATUSES.has(job.status)) { + job.finishedAt = nowIso(); + scheduleCleanup(jobId); + } + } catch (err) { + const msg = err?.message || String(err || 'Unknown error'); + job.status = 'error'; + job.error = msg; + job.finishedAt = nowIso(); + appendProgressLog(job, `[job] Error: ${msg}`); + scheduleCleanup(jobId); + fastify.log.error({ err, jobId }, 'Transfer job execution failed'); + } finally { + unregisterJobProgressSink(jobId); + job.running = false; + job.updatedAt = nowIso(); + } +} + +function scheduleJobRun(jobId, fastify) { + const job = jobs.get(jobId); + if (!job || job.running) return; + setImmediate(() => { + executeJob(jobId, fastify).catch((err) => { + fastify.log.error({ err, jobId }, 'Failed to schedule transfer job run'); + }); + }); +} + export function registerTransferRoutes(fastify) { /** @@ -29,6 +190,7 @@ export function registerTransferRoutes(fastify) { layoutCheck = false, llmConfig, } = request.body || {}; + const effectiveLayoutCheck = LAYOUT_CHECK_ENABLED && Boolean(layoutCheck); if (!sourceProjectId || !sourceMainFile || !targetTemplateId || !targetMainFile) { return reply.code(400).send({ error: 'Missing required fields.' }); @@ -40,6 +202,15 @@ export function registerTransferRoutes(fastify) { if (!template) { return reply.code(400).send({ error: `Unknown template: ${targetTemplateId}` }); } + const templateRoot = path.join(TEMPLATE_DIR, targetTemplateId); + const templateMainAbs = path.join(templateRoot, targetMainFile); + try { + await fs.access(templateMainAbs); + } catch { + return reply.code(400).send({ + error: `Template main file not found: ${targetMainFile} (template: ${targetTemplateId})`, + }); + } // Create a new project from the template await ensureDir(DATA_DIR); @@ -62,12 +233,15 @@ export function registerTransferRoutes(fastify) { await writeJson(path.join(projectRoot, 'project.json'), meta); // Copy template files into the new project - const templateRoot = path.join(TEMPLATE_DIR, targetTemplateId); await copyDir(templateRoot, projectRoot); // Build transfer graph const jobId = crypto.randomUUID(); const graph = buildTransferGraph(); + const initialProgressLog = []; + if (layoutCheck && !effectiveLayoutCheck) { + initialProgressLog.push('[start] Layout check is temporarily disabled. Proceeding without VLM review.'); + } const initialState = { sourceProjectId, @@ -75,8 +249,9 @@ export function registerTransferRoutes(fastify) { targetProjectId: newProjectId, targetMainFile, engine, - layoutCheck, + layoutCheck: effectiveLayoutCheck, llmConfig: resolveLLMConfig(llmConfig), + transferMode: 'legacy', jobId, }; @@ -84,55 +259,42 @@ export function registerTransferRoutes(fastify) { graph, state: initialState, status: 'pending', - progressLog: [], + progressLog: initialProgressLog, hasStarted: false, - iterator: null, + running: false, + error: null, + currentNode: null, + startedAt: null, + updatedAt: nowIso(), + finishedAt: null, + cleanupTimer: null, }); + scheduleJobRun(jobId, fastify); return { jobId, newProjectId }; }); /** * POST /api/transfer/step * Body: { jobId } - * Runs the graph one step forward. - * Returns: { status, currentNode, progressLog } + * Compatibility route for older clients. + * Starts background execution if needed and returns current status. */ - fastify.post('/api/transfer/step', async (request, reply) => { + fastify.post('/api/transfer/step', { logLevel: 'warn' }, async (request, reply) => { const { jobId } = request.body || {}; const job = jobs.get(jobId); if (!job) { return reply.code(404).send({ error: 'Job not found.' }); } - // If waiting for images, don't proceed - if (job.status === 'waiting_images') { - return { status: 'waiting_images', progressLog: job.progressLog }; + if (!job.running + && !TERMINAL_STATUSES.has(job.status) + && job.status !== 'waiting_upload' + && job.status !== 'waiting_images') { + scheduleJobRun(jobId, fastify); } - try { - job.status = 'running'; - const runConfig = { configurable: { thread_id: jobId } }; - const input = job.hasStarted ? null : job.state; - const result = await job.graph.invoke(input, runConfig); - job.hasStarted = true; - job.state = result; - job.progressLog = result.progressLog || []; - job.status = result.status || 'running'; - - return { - status: job.status, - progressLog: job.progressLog, - }; - } catch (err) { - const msg = err?.message || String(err || 'Unknown error'); - job.status = 'error'; - job.error = msg; - return reply.code(500).send({ - error: msg, - progressLog: job.progressLog, - }); - } + return serializeJob(job); }); /** @@ -151,7 +313,7 @@ export function registerTransferRoutes(fastify) { return reply.code(400).send({ error: 'Job is not waiting for images.' }); } - // Inject images into checkpointed state so the next /step can resume from checkLayout. + // Inject images into checkpointed state so background execution can resume from checkLayout. const updated = { pageImages: images || [], status: 'running' }; try { if (job.hasStarted && typeof job.graph.updateState === 'function') { @@ -164,7 +326,10 @@ export function registerTransferRoutes(fastify) { // Fallback to in-memory state mutation if checkpoint update fails. } job.state = { ...job.state, ...updated }; - job.status = 'running'; + job.status = 'pending'; + job.updatedAt = nowIso(); + appendProgressLog(job, `[submit-images] Received ${Array.isArray(images) ? images.length : 0} page images, resuming transfer.`); + scheduleJobRun(jobId, fastify); return { ok: true }; }); @@ -173,17 +338,13 @@ export function registerTransferRoutes(fastify) { * GET /api/transfer/status/:jobId * Returns current job status and progress log. */ - fastify.get('/api/transfer/status/:jobId', async (request, reply) => { + fastify.get('/api/transfer/status/:jobId', { logLevel: 'warn' }, async (request, reply) => { const job = jobs.get(request.params.jobId); if (!job) { return reply.code(404).send({ error: 'Job not found.' }); } - return { - status: job.status, - progressLog: job.progressLog, - error: job.error || null, - }; + return serializeJob(job); }); /** @@ -204,6 +365,7 @@ export function registerTransferRoutes(fastify) { llmConfig, mineruConfig, } = request.body || {}; + const effectiveLayoutCheck = LAYOUT_CHECK_ENABLED && Boolean(layoutCheck); if (!targetTemplateId || !targetMainFile) { return reply.code(400).send({ error: 'Missing targetTemplateId or targetMainFile.' }); @@ -220,6 +382,15 @@ export function registerTransferRoutes(fastify) { if (!template) { return reply.code(400).send({ error: `Unknown template: ${targetTemplateId}` }); } + const templateRoot = path.join(TEMPLATE_DIR, targetTemplateId); + const templateMainAbs = path.join(templateRoot, targetMainFile); + try { + await fs.access(templateMainAbs); + } catch { + return reply.code(400).send({ + error: `Template main file not found: ${targetMainFile} (template: ${targetTemplateId})`, + }); + } // Create new project from template await ensureDir(DATA_DIR); @@ -242,12 +413,22 @@ export function registerTransferRoutes(fastify) { }; await writeJson(path.join(projectRoot, 'project.json'), meta); - const templateRoot = path.join(TEMPLATE_DIR, targetTemplateId); await copyDir(templateRoot, projectRoot); // Build MinerU transfer graph const jobId = crypto.randomUUID(); const graph = buildMineruTransferGraph(); + const resolvedMineruConfig = resolveMineruConfig(mineruConfig); + if (!resolvedMineruConfig.token) { + return reply.code(400).send({ error: 'MinerU token not configured.' }); + } + const initialProgressLog = []; + if (layoutCheck && !effectiveLayoutCheck) { + initialProgressLog.push('[start-mineru] Layout check is temporarily disabled. Proceeding without VLM review.'); + } + if (!sourceProjectId) { + initialProgressLog.push('[start-mineru] Waiting for PDF upload before execution.'); + } const initialState = { sourceProjectId: sourceProjectId || '', @@ -255,9 +436,9 @@ export function registerTransferRoutes(fastify) { targetProjectId: newProjectId, targetMainFile, engine, - layoutCheck, + layoutCheck: effectiveLayoutCheck, llmConfig: resolveLLMConfig(llmConfig), - mineruConfig: resolveMineruConfig(mineruConfig), + mineruConfig: resolvedMineruConfig, transferMode: 'mineru', jobId, }; @@ -265,12 +446,22 @@ export function registerTransferRoutes(fastify) { jobs.set(jobId, { graph, state: initialState, - status: 'pending', - progressLog: [], + status: sourceProjectId ? 'pending' : 'waiting_upload', + progressLog: initialProgressLog, hasStarted: false, - iterator: null, + running: false, + error: null, + currentNode: null, + startedAt: null, + updatedAt: nowIso(), + finishedAt: null, + cleanupTimer: null, }); + if (sourceProjectId) { + scheduleJobRun(jobId, fastify); + } + return { jobId, newProjectId }; }); @@ -283,13 +474,24 @@ export function registerTransferRoutes(fastify) { const parts = request.parts(); let jobId = ''; let pdfBuffer = null; + let pdfFileName = ''; + let pdfMimeType = ''; for await (const part of parts) { if (part.fieldname === 'jobId' && part.type === 'field') { jobId = part.value; } else if (part.fieldname === 'pdf' && part.type === 'file') { + pdfFileName = part.filename || ''; + pdfMimeType = part.mimetype || ''; const chunks = []; + let totalBytes = 0; for await (const chunk of part.file) { + totalBytes += chunk.length; + if (totalBytes > MINERU_MAX_FILE_BYTES) { + return reply.code(400).send({ + error: `PDF exceeds MinerU upload limit (${MINERU_MAX_FILE_BYTES} bytes).`, + }); + } chunks.push(chunk); } pdfBuffer = Buffer.concat(chunks); @@ -311,6 +513,12 @@ export function registerTransferRoutes(fastify) { if (!pdfBuffer) { return reply.code(400).send({ error: 'No PDF file uploaded.' }); } + if (!isLikelyPdfUpload(pdfFileName, pdfMimeType)) { + return reply.code(400).send({ error: 'Uploaded file must be a PDF.' }); + } + if (!pdfBuffer.length) { + return reply.code(400).send({ error: 'Uploaded PDF is empty.' }); + } // Save PDF to target project directory const pdfPath = path.join(job.state.targetProjectId @@ -321,6 +529,10 @@ export function registerTransferRoutes(fastify) { // Set sourcePdfPath in state so compileSource skips compilation job.state.sourcePdfPath = pdfPath; + job.status = 'pending'; + job.updatedAt = nowIso(); + appendProgressLog(job, `[upload-pdf] Uploaded source PDF (${pdfBuffer.length} bytes).`); + scheduleJobRun(jobId, fastify); return { ok: true, pdfPath }; }); diff --git a/apps/backend/src/services/mineruService.js b/apps/backend/src/services/mineruService.js index b7cc77f..ba56676 100644 --- a/apps/backend/src/services/mineruService.js +++ b/apps/backend/src/services/mineruService.js @@ -4,7 +4,7 @@ import { MINERU_API_BASE, MINERU_POLL_INTERVAL_MS, MINERU_MAX_POLL_ATTEMPTS } fr import { ensureDir } from '../utils/fsUtils.js'; import { safeJoin } from '../utils/pathUtils.js'; -const MINERU_MAX_FILE_BYTES = 200 * 1024 * 1024; +export const MINERU_MAX_FILE_BYTES = 200 * 1024 * 1024; /** * Resolve MinerU configuration from request config or environment variables. @@ -224,7 +224,7 @@ async function downloadAndExtractZip(zipUrl, outputDir) { * _content_list.json (or similar) */ async function parseExtractedOutput(outputDir) { - const markdownPath = await findFirstFileRecursive(outputDir, p => p.toLowerCase().endsWith('.md')); + const { markdownPath, selectionReason } = await selectMarkdownFile(outputDir); if (!markdownPath) { throw new Error('MinerU output missing markdown file'); } @@ -245,40 +245,81 @@ async function parseExtractedOutput(outputDir) { images.push(...fallback); } - return { markdownContent, images, searchDir }; + return { markdownContent, images, searchDir, markdownPath, selectionReason }; } -async function findFirstFileRecursive(rootDir, predicate) { +async function selectMarkdownFile(outputDir) { + const preferred = path.join(outputDir, 'full.md'); + try { + const text = await fs.readFile(preferred, 'utf8'); + if (text.trim()) { + return { markdownPath: preferred, selectionReason: 'preferred-full-md' }; + } + } catch { + // Ignore missing preferred file. + } + + const candidates = await findFilesRecursive(outputDir, p => p.toLowerCase().endsWith('.md')); + if (!candidates.length) { + return { markdownPath: '', selectionReason: 'none' }; + } + + const ranked = []; + for (const candidate of candidates) { + try { + const stat = await fs.stat(candidate); + ranked.push({ path: candidate, size: stat.size || 0 }); + } catch { + // Ignore unreadable files. + } + } + ranked.sort((a, b) => b.size - a.size); + + for (const item of ranked) { + try { + const text = await fs.readFile(item.path, 'utf8'); + if (text.trim()) { + return { markdownPath: item.path, selectionReason: 'fallback-largest-md' }; + } + } catch { + // Ignore unreadable files. + } + } + + return { markdownPath: '', selectionReason: 'none' }; +} + +async function findFilesRecursive(rootDir, predicate) { + const out = []; const entries = await fs.readdir(rootDir, { withFileTypes: true }); for (const entry of entries) { const abs = path.join(rootDir, entry.name); - if (entry.isFile() && predicate(abs)) return abs; + if (entry.isFile() && predicate(abs)) out.push(abs); } for (const entry of entries) { if (!entry.isDirectory()) continue; - const found = await findFirstFileRecursive(path.join(rootDir, entry.name), predicate); - if (found) return found; + out.push(...await findFilesRecursive(path.join(rootDir, entry.name), predicate)); } - return ''; + return out; } function isImageFilePath(filePath) { return /\.(png|jpe?g|gif|webp|bmp|svg)$/i.test(filePath); } -async function listImageFilesRecursive(imagesDir) { +async function listImageFilesRecursive(imagesDir, baseDir = imagesDir) { const out = []; try { const entries = await fs.readdir(imagesDir, { withFileTypes: true }); for (const entry of entries) { const abs = path.join(imagesDir, entry.name); if (entry.isDirectory()) { - out.push(...await listImageFilesRecursive(abs)); + out.push(...await listImageFilesRecursive(abs, baseDir)); continue; } if (entry.isFile() && isImageFilePath(abs)) { out.push({ - name: path.basename(abs), + name: path.relative(baseDir, abs).replace(/\\/g, '/'), localPath: abs, }); } diff --git a/apps/backend/src/services/transferAgent/graphMineru.js b/apps/backend/src/services/transferAgent/graphMineru.js index fc2f21e..87611f2 100644 --- a/apps/backend/src/services/transferAgent/graphMineru.js +++ b/apps/backend/src/services/transferAgent/graphMineru.js @@ -45,8 +45,8 @@ function afterLayoutCheck(state) { * Build the MinerU-based transfer workflow. * * Flow: - * compileSource → parsePdfWithMineru → analyzeTarget → applyTransfer - * → copyAssets → compile → [fixCompile loop] + * compileSource → parsePdfWithMineru → analyzeTarget → copyAssets + * → applyTransfer → compile → [fixCompile loop] * → [requestPageImages → checkLayout → fixLayout loop] → finalize */ export function buildMineruTransferGraph() { @@ -71,9 +71,9 @@ export function buildMineruTransferGraph() { // Linear edges graph.addEdge('compileSource', 'parsePdfWithMineru'); graph.addEdge('parsePdfWithMineru', 'analyzeTarget'); - graph.addEdge('analyzeTarget', 'applyTransfer'); - graph.addEdge('applyTransfer', 'copyAssets'); - graph.addEdge('copyAssets', 'compile'); + graph.addEdge('analyzeTarget', 'copyAssets'); + graph.addEdge('copyAssets', 'applyTransfer'); + graph.addEdge('applyTransfer', 'compile'); // Conditional: after compile graph.addConditionalEdges('compile', afterCompile, { diff --git a/apps/backend/src/services/transferAgent/nodes/analyzeSource.js b/apps/backend/src/services/transferAgent/nodes/analyzeSource.js index 50b31a4..76894e7 100644 --- a/apps/backend/src/services/transferAgent/nodes/analyzeSource.js +++ b/apps/backend/src/services/transferAgent/nodes/analyzeSource.js @@ -1,44 +1,8 @@ import { promises as fs } from 'fs'; import path from 'path'; import { getProjectRoot } from '../../projectService.js'; -import { safeJoin } from '../../../utils/pathUtils.js'; import { listFilesRecursive } from '../../../utils/fsUtils.js'; -import { isTextFile } from '../../../utils/texUtils.js'; - -/** - * Recursively resolve \input{} and \include{} references, - * returning the concatenated full content. - */ -async function resolveInputs(projectRoot, relPath, visited = new Set()) { - if (visited.has(relPath)) return ''; - visited.add(relPath); - - const absPath = safeJoin(projectRoot, relPath); - let content; - try { - content = await fs.readFile(absPath, 'utf8'); - } catch { - return ''; - } - - // Match \input{...} and \include{...} - const pattern = /\\(?:input|include)\{([^}]+)\}/g; - let result = ''; - let lastIndex = 0; - let match; - - while ((match = pattern.exec(content)) !== null) { - result += content.slice(lastIndex, match.index); - let ref = match[1].trim(); - // Add .tex extension if missing - if (!path.extname(ref)) ref += '.tex'; - const childContent = await resolveInputs(projectRoot, ref, visited); - result += childContent; - lastIndex = pattern.lastIndex; - } - result += content.slice(lastIndex); - return result; -} +import { resolveTexInputs } from '../utils.js'; /** * Parse section/subsection outline from LaTeX content. @@ -96,7 +60,7 @@ export async function analyzeSource(state) { const allFiles = await listFilesRecursive(projectRoot); // Resolve all \input/\include and get full content - const fullContent = await resolveInputs(projectRoot, state.sourceMainFile); + const fullContent = await resolveTexInputs(projectRoot, state.sourceMainFile); const outline = parseOutline(fullContent); const assets = collectAssets(fullContent, allFiles); diff --git a/apps/backend/src/services/transferAgent/nodes/analyzeTarget.js b/apps/backend/src/services/transferAgent/nodes/analyzeTarget.js index e29bb89..aab4a84 100644 --- a/apps/backend/src/services/transferAgent/nodes/analyzeTarget.js +++ b/apps/backend/src/services/transferAgent/nodes/analyzeTarget.js @@ -1,40 +1,5 @@ -import { promises as fs } from 'fs'; -import path from 'path'; import { getProjectRoot } from '../../projectService.js'; -import { safeJoin } from '../../../utils/pathUtils.js'; -import { listFilesRecursive } from '../../../utils/fsUtils.js'; - -/** - * Recursively resolve \input{} and \include references. - */ -async function resolveInputs(projectRoot, relPath, visited = new Set()) { - if (visited.has(relPath)) return ''; - visited.add(relPath); - - const absPath = safeJoin(projectRoot, relPath); - let content; - try { - content = await fs.readFile(absPath, 'utf8'); - } catch { - return ''; - } - - const pattern = /\\(?:input|include)\{([^}]+)\}/g; - let result = ''; - let lastIndex = 0; - let match; - - while ((match = pattern.exec(content)) !== null) { - result += content.slice(lastIndex, match.index); - let ref = match[1].trim(); - if (!path.extname(ref)) ref += '.tex'; - const childContent = await resolveInputs(projectRoot, ref, visited); - result += childContent; - lastIndex = pattern.lastIndex; - } - result += content.slice(lastIndex); - return result; -} +import { resolveTexInputs } from '../utils.js'; /** * Extract preamble (everything before \begin{document}). @@ -66,7 +31,10 @@ function parseOutline(content) { export async function analyzeTarget(state) { const projectRoot = await getProjectRoot(state.targetProjectId); - const fullContent = await resolveInputs(projectRoot, state.targetMainFile); + const fullContent = await resolveTexInputs(projectRoot, state.targetMainFile, { strictRoot: true }); + if (!fullContent.trim()) { + throw new Error(`[analyzeTarget] Target template content is empty: ${state.targetMainFile}`); + } const preamble = extractPreamble(fullContent); const outline = parseOutline(fullContent); diff --git a/apps/backend/src/services/transferAgent/nodes/applyTransfer.js b/apps/backend/src/services/transferAgent/nodes/applyTransfer.js index 0c67c88..e7e9ce5 100644 --- a/apps/backend/src/services/transferAgent/nodes/applyTransfer.js +++ b/apps/backend/src/services/transferAgent/nodes/applyTransfer.js @@ -1,8 +1,6 @@ -import { promises as fs } from 'fs'; import { ChatOpenAI } from '@langchain/openai'; import { resolveLLMConfig, normalizeBaseURL } from '../../llmService.js'; -import { safeJoin } from '../../../utils/pathUtils.js'; -import { writeFileWithSnapshot, stripCodeFences } from '../utils.js'; +import { writeFileWithSnapshot, stripCodeFences, invokeLLMTextWithDebug } from '../utils.js'; /** * Build the LLM prompt for content migration. @@ -42,7 +40,7 @@ Output ONLY the complete LaTeX file content. No explanations, no markdown fences */ function buildMineruTransferPrompt(state) { const imageList = (state.sourceImages || []) - .map(img => img.name) + .map(img => img.targetPath || `images/${img.name}`) .join(', '); return `You are a LaTeX template filling expert. @@ -58,13 +56,15 @@ ${state.targetTemplateContent} ## IMAGE FILES AVAILABLE: ${imageList || '(none)'} +The Markdown image references have already been normalized to the target project's image paths. + ## RULES: 1. Keep the target preamble (everything before \\begin{document}) EXACTLY as-is 2. Only modify content between \\begin{document} and \\end{document} 3. Map Markdown headings to the corresponding \\section{}, \\subsection{} etc. in the template 4. Formulas in the Markdown are already in LaTeX format ($...$ or $$...$$) — preserve them as-is 5. Convert HTML tables in the Markdown to LaTeX \\begin{tabular} environments -6. For images referenced in the Markdown, use \\includegraphics{images/} wrapped in \\begin{figure}...\\end{figure} +6. For images referenced in the Markdown, preserve the normalized image path exactly in \\includegraphics{...} and wrap figures in \\begin{figure}...\\end{figure} 7. Preserve ALL text content — do not omit any paragraphs or sections 8. Do NOT add content that doesn't exist in the Markdown 9. Output the COMPLETE .tex file content, not just the body @@ -86,8 +86,13 @@ async function applyTransferLegacy(state) { }); const prompt = buildTransferPrompt(state); - const response = await llm.invoke([{ role: 'user', content: prompt }]); - const newContent = stripCodeFences(response.content); + const { text, progressLog } = await invokeLLMTextWithDebug({ + llm, + messages: [{ role: 'user', content: prompt }], + state, + nodeName: 'applyTransfer', + }); + const newContent = stripCodeFences(text); await writeFileWithSnapshot( state.targetProjectRoot, @@ -97,7 +102,10 @@ async function applyTransferLegacy(state) { ); return { - progressLog: `[applyTransfer] Wrote migrated content to ${state.targetMainFile} (${newContent.length} chars).`, + progressLog: [ + ...progressLog, + `[applyTransfer] Wrote migrated content to ${state.targetMainFile} (${newContent.length} chars).`, + ], }; } @@ -115,8 +123,13 @@ async function applyTransferMineru(state) { }); const prompt = buildMineruTransferPrompt(state); - const response = await llm.invoke([{ role: 'user', content: prompt }]); - const newContent = stripCodeFences(response.content); + const { text, progressLog } = await invokeLLMTextWithDebug({ + llm, + messages: [{ role: 'user', content: prompt }], + state, + nodeName: 'applyTransfer:mineru', + }); + const newContent = stripCodeFences(text); await writeFileWithSnapshot( state.targetProjectRoot, @@ -126,7 +139,10 @@ async function applyTransferMineru(state) { ); return { - progressLog: `[applyTransfer:mineru] Wrote content to ${state.targetMainFile} (${newContent.length} chars).`, + progressLog: [ + ...progressLog, + `[applyTransfer:mineru] Wrote content to ${state.targetMainFile} (${newContent.length} chars).`, + ], }; } diff --git a/apps/backend/src/services/transferAgent/nodes/copyAssets.js b/apps/backend/src/services/transferAgent/nodes/copyAssets.js index b0f9cd1..4e3b75b 100644 --- a/apps/backend/src/services/transferAgent/nodes/copyAssets.js +++ b/apps/backend/src/services/transferAgent/nodes/copyAssets.js @@ -37,6 +37,41 @@ async function copySingleAsset(srcRoot, destRoot, relPath) { return { path: relPath, destRel, status: destRel !== relPath ? 'conflict' : 'copied' }; } +function escapeRegExp(text) { + return String(text || '').replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} + +function rewriteMineruMarkdownImageRefs(markdown, manifest) { + if (!markdown || !manifest.length) return markdown; + + const rewriteUrl = (rawUrl) => { + const normalizedUrl = String(rawUrl || '').replace(/\\/g, '/'); + for (const item of manifest) { + const sourceName = String(item.sourceName || '').replace(/\\/g, '/'); + if (!sourceName) continue; + if ( + normalizedUrl === sourceName + || normalizedUrl === `./${sourceName}` + || normalizedUrl.endsWith(`/${sourceName}`) + || path.posix.basename(normalizedUrl) === path.posix.basename(sourceName) + ) { + return item.targetPath; + } + } + return rawUrl; + }; + + let next = markdown.replace(/(!\[[^\]]*\]\()([^) \t]+)([^)]*\))/g, (match, prefix, url, suffix) => { + return `${prefix}${rewriteUrl(url)}${suffix}`; + }); + + next = next.replace(/(]*\bsrc=["'])([^"']+)(["'][^>]*>)/gi, (match, prefix, url, suffix) => { + return `${prefix}${rewriteUrl(url)}${suffix}`; + }); + + return next; +} + /** * Legacy mode: copy bib files, images, and style files from source project. */ @@ -78,16 +113,38 @@ async function copyAssetsLegacy(state) { async function copyAssetsMineru(state) { const images = state.sourceImages || []; let copiedCount = 0; + let renamedCount = 0; + const copiedImages = []; // Copy MinerU-extracted images to target project images/ const imagesDir = path.join(state.targetProjectRoot, 'images'); await ensureDir(imagesDir); + const usedNames = new Set(); for (const img of images) { - const destPath = path.join(imagesDir, img.name); + const sourceName = String(img.name || path.basename(img.localPath || 'image')).replace(/\\/g, '/'); + const originalName = path.basename(sourceName); + const ext = path.extname(originalName); + const stem = ext ? originalName.slice(0, -ext.length) : originalName; + let finalName = originalName; + let suffix = 2; + while (usedNames.has(finalName) || await fileExists(path.join(imagesDir, finalName))) { + finalName = `${stem}__dup${suffix}${ext}`; + suffix++; + } + usedNames.add(finalName); + if (finalName !== originalName) renamedCount++; + + const destPath = path.join(imagesDir, finalName); if (await fileExists(img.localPath)) { await fs.copyFile(img.localPath, destPath); copiedCount++; + copiedImages.push({ + ...img, + sourceName, + name: finalName, + targetPath: `images/${finalName}`, + }); } } @@ -110,8 +167,12 @@ async function copyAssetsMineru(state) { } } + const rewrittenMarkdown = rewriteMineruMarkdownImageRefs(state.sourceMarkdown || '', copiedImages); + return { - progressLog: `[copyAssets:mineru] Copied ${copiedCount} images, ${bibCount} bib files.`, + sourceImages: copiedImages, + sourceMarkdown: rewrittenMarkdown, + progressLog: `[copyAssets:mineru] Copied ${copiedCount} images (${renamedCount} renamed to avoid conflicts), ${bibCount} bib files.`, }; } diff --git a/apps/backend/src/services/transferAgent/nodes/fixCompile.js b/apps/backend/src/services/transferAgent/nodes/fixCompile.js index 8d1e1da..f4d27f7 100644 --- a/apps/backend/src/services/transferAgent/nodes/fixCompile.js +++ b/apps/backend/src/services/transferAgent/nodes/fixCompile.js @@ -2,7 +2,7 @@ import { promises as fs } from 'fs'; import { ChatOpenAI } from '@langchain/openai'; import { resolveLLMConfig, normalizeBaseURL } from '../../llmService.js'; import { safeJoin } from '../../../utils/pathUtils.js'; -import { writeFileWithSnapshot, stripCodeFences } from '../utils.js'; +import { writeFileWithSnapshot, stripCodeFences, invokeLLMTextWithDebug } from '../utils.js'; const MAX_LOG_TAIL = 8000; @@ -43,8 +43,13 @@ Common fixes: Output ONLY the complete corrected LaTeX file. No explanations, no markdown fences.`; - const response = await llm.invoke([{ role: 'user', content: prompt }]); - const fixed = stripCodeFences(response.content); + const { text, progressLog } = await invokeLLMTextWithDebug({ + llm, + messages: [{ role: 'user', content: prompt }], + state, + nodeName: 'fixCompile', + }); + const fixed = stripCodeFences(text); await writeFileWithSnapshot( state.targetProjectRoot, @@ -54,6 +59,9 @@ Output ONLY the complete corrected LaTeX file. No explanations, no markdown fenc ); return { - progressLog: `[fixCompile] Applied LLM fix for compile attempt ${state.compileAttempt}.`, + progressLog: [ + ...progressLog, + `[fixCompile] Applied LLM fix for compile attempt ${state.compileAttempt}.`, + ], }; } diff --git a/apps/backend/src/services/transferAgent/nodes/fixLayout.js b/apps/backend/src/services/transferAgent/nodes/fixLayout.js index 05e6ff5..c7e396e 100644 --- a/apps/backend/src/services/transferAgent/nodes/fixLayout.js +++ b/apps/backend/src/services/transferAgent/nodes/fixLayout.js @@ -2,7 +2,7 @@ import { promises as fs } from 'fs'; import { ChatOpenAI } from '@langchain/openai'; import { resolveLLMConfig, normalizeBaseURL } from '../../llmService.js'; import { safeJoin } from '../../../utils/pathUtils.js'; -import { writeFileWithSnapshot, stripCodeFences } from '../utils.js'; +import { writeFileWithSnapshot, stripCodeFences, invokeLLMTextWithDebug } from '../utils.js'; /** * fixLayout node — LLM reads current main.tex + VLM layout issues, @@ -42,8 +42,13 @@ Common layout fixes: Output ONLY the complete corrected LaTeX file. No explanations, no markdown fences.`; - const response = await llm.invoke([{ role: 'user', content: prompt }]); - const fixed = stripCodeFences(response.content); + const { text, progressLog } = await invokeLLMTextWithDebug({ + llm, + messages: [{ role: 'user', content: prompt }], + state, + nodeName: 'fixLayout', + }); + const fixed = stripCodeFences(text); await writeFileWithSnapshot( state.targetProjectRoot, @@ -53,6 +58,9 @@ Output ONLY the complete corrected LaTeX file. No explanations, no markdown fenc ); return { - progressLog: `[fixLayout] Applied LLM fix for ${issues.length} layout issues (attempt ${state.layoutAttempt}).`, + progressLog: [ + ...progressLog, + `[fixLayout] Applied LLM fix for ${issues.length} layout issues (attempt ${state.layoutAttempt}).`, + ], }; } diff --git a/apps/backend/src/services/transferAgent/nodes/parsePdfWithMineru.js b/apps/backend/src/services/transferAgent/nodes/parsePdfWithMineru.js index b5a67a4..a3ff01f 100644 --- a/apps/backend/src/services/transferAgent/nodes/parsePdfWithMineru.js +++ b/apps/backend/src/services/transferAgent/nodes/parsePdfWithMineru.js @@ -2,6 +2,7 @@ import path from 'path'; import { parsePdfWithMineru as callMineru } from '../../mineruService.js'; import { ensureDir } from '../../../utils/fsUtils.js'; import { getProjectRoot } from '../../projectService.js'; +import { pushJobProgress } from '../runtimeProgress.js'; /** * parsePdfWithMineru node — calls MinerU API to parse the source PDF @@ -12,20 +13,36 @@ export async function parsePdfWithMineru(state) { const outputDir = path.join(targetProjectRoot, '_mineru_output'); await ensureDir(outputDir); + let lastProgressKey = ''; + const onProgress = (info) => { + if (!info || typeof info !== 'object') return; + const phase = info.phase || info.state || 'unknown'; + const pageInfo = (typeof info.extractedPages === 'number' && typeof info.totalPages === 'number') + ? ` (${info.extractedPages}/${info.totalPages} pages)` + : ''; + const key = `${phase}:${info.state || ''}:${info.extractedPages || ''}:${info.totalPages || ''}`; + if (key === lastProgressKey) return; + lastProgressKey = key; + pushJobProgress(state.jobId, `[parsePdfWithMineru] MinerU phase: ${phase}${pageInfo}`); + }; + const result = await callMineru( state.sourcePdfPath, state.mineruConfig, outputDir, + onProgress, ); const mdLen = (result.markdownContent || '').length; const imgCount = (result.images || []).length; + const mdPath = result.markdownPath || '(unknown)'; + const mdReason = result.selectionReason ? `, selection=${result.selectionReason}` : ''; return { sourceMarkdown: result.markdownContent, sourceImages: result.images || [], targetProjectRoot, mineruOutputDir: outputDir, - progressLog: `[parsePdfWithMineru] Parsed PDF: ${mdLen} chars markdown, ${imgCount} images.`, + progressLog: `[parsePdfWithMineru] Parsed PDF: ${mdLen} chars markdown, ${imgCount} images, markdown=${mdPath}${mdReason}.`, }; } diff --git a/apps/backend/src/services/transferAgent/runtimeProgress.js b/apps/backend/src/services/transferAgent/runtimeProgress.js new file mode 100644 index 0000000..b6e2482 --- /dev/null +++ b/apps/backend/src/services/transferAgent/runtimeProgress.js @@ -0,0 +1,21 @@ +const sinks = new Map(); + +export function registerJobProgressSink(jobId, sink) { + if (!jobId || typeof sink !== 'function') return; + sinks.set(jobId, sink); +} + +export function unregisterJobProgressSink(jobId) { + if (!jobId) return; + sinks.delete(jobId); +} + +export function pushJobProgress(jobId, progressLog) { + const sink = sinks.get(jobId); + if (!sink) return; + try { + sink(progressLog); + } catch { + // Ignore sink failures. + } +} diff --git a/apps/backend/src/services/transferAgent/utils.js b/apps/backend/src/services/transferAgent/utils.js index 5d3d335..2fffe1d 100644 --- a/apps/backend/src/services/transferAgent/utils.js +++ b/apps/backend/src/services/transferAgent/utils.js @@ -1,4 +1,5 @@ import { promises as fs } from 'fs'; +import crypto from 'crypto'; import path from 'path'; import { ensureDir } from '../../utils/fsUtils.js'; import { safeJoin } from '../../utils/pathUtils.js'; @@ -98,6 +99,59 @@ export function validateSchema(obj, schema) { return errors.length ? { valid: false, errors } : { valid: true }; } +// --------------------------------------------------------------------------- +// LaTeX file resolution +// --------------------------------------------------------------------------- + +function normalizeTexRelPath(relPath) { + return path.posix + .normalize(String(relPath || '').replace(/\\/g, '/')) + .replace(/^(\.\/)+/, '') + .replace(/^\/+/, ''); +} + +async function resolveTexInputsInner(projectRoot, relPath, visited, strictCurrent) { + const normalizedRelPath = normalizeTexRelPath(relPath); + if (!normalizedRelPath) return ''; + if (visited.has(normalizedRelPath)) return ''; + visited.add(normalizedRelPath); + + const absPath = safeJoin(projectRoot, normalizedRelPath); + let content; + try { + content = await fs.readFile(absPath, 'utf8'); + } catch (err) { + if (strictCurrent) { + throw new Error(`Failed to read TeX file "${normalizedRelPath}": ${err?.message || 'not found'}`); + } + return ''; + } + + const baseDir = path.posix.dirname(normalizedRelPath); + const pattern = /\\(?:input|include)\{([^}]+)\}/g; + let result = ''; + let lastIndex = 0; + let match; + + while ((match = pattern.exec(content)) !== null) { + result += content.slice(lastIndex, match.index); + let ref = match[1].trim(); + if (!path.posix.extname(ref)) ref += '.tex'; + const childRelPath = normalizeTexRelPath(path.posix.join(baseDir, ref)); + const childContent = await resolveTexInputsInner(projectRoot, childRelPath, visited, false); + result += childContent; + lastIndex = pattern.lastIndex; + } + + result += content.slice(lastIndex); + return result; +} + +export async function resolveTexInputs(projectRoot, relPath, opts = {}) { + const visited = opts.visited instanceof Set ? opts.visited : new Set(); + return resolveTexInputsInner(projectRoot, relPath, visited, Boolean(opts.strictRoot)); +} + // --------------------------------------------------------------------------- // Retryable LLM JSON call // --------------------------------------------------------------------------- @@ -182,3 +236,155 @@ export async function writeFileWithSnapshot(projectRoot, relPath, content, jobId await ensureDir(path.dirname(absPath)); await fs.writeFile(absPath, content, 'utf8'); } + +// --------------------------------------------------------------------------- +// LLM debug logging +// --------------------------------------------------------------------------- + +const DEBUG_LEVELS = new Set(['off', 'meta', 'preview', 'full']); + +function getTransferDebugConfig() { + const requested = String(process.env.OPENPRISM_TRANSFER_DEBUG_LEVEL || 'preview') + .trim() + .toLowerCase(); + const level = DEBUG_LEVELS.has(requested) ? requested : 'preview'; + const allowFull = String(process.env.OPENPRISM_TRANSFER_DEBUG_FULL || '') + .trim() + .toLowerCase() === 'true'; + const rawPreviewChars = Number.parseInt(String(process.env.OPENPRISM_TRANSFER_DEBUG_PREVIEW_CHARS || '400'), 10); + const previewChars = Number.isFinite(rawPreviewChars) + ? Math.max(60, Math.min(rawPreviewChars, 4000)) + : 400; + return { level, allowFull, previewChars }; +} + +function normalizeTextContent(content) { + if (content === undefined || content === null) return ''; + if (typeof content === 'string') return content; + if (Array.isArray(content)) { + return content + .map(part => { + if (typeof part === 'string') return part; + try { + return JSON.stringify(part); + } catch { + return String(part); + } + }) + .join('\n'); + } + if (typeof content === 'object') { + try { + return JSON.stringify(content); + } catch { + return String(content); + } + } + return String(content); +} + +function flattenMessages(messages) { + if (!Array.isArray(messages)) return ''; + return messages + .map((m) => { + if (!m || typeof m !== 'object') return String(m || ''); + const role = m.role || m._getType?.() || m.type || 'message'; + const content = normalizeTextContent(m.content); + return `[${role}]\n${content}`; + }) + .join('\n\n'); +} + +function sha256(text) { + return crypto.createHash('sha256').update(text).digest('hex'); +} + +function toSingleLine(text) { + return String(text || '').replace(/\s+/g, ' ').trim(); +} + +function previewPair(text, previewChars) { + if (!text) return { head: '', tail: '' }; + if (text.length <= previewChars * 2) { + return { head: text, tail: '' }; + } + return { + head: text.slice(0, previewChars), + tail: text.slice(-previewChars), + }; +} + +async function appendLlmDebugRecord(state, record) { + if (!state?.targetProjectRoot || !state?.jobId) return; + const debugDir = path.join(state.targetProjectRoot, '.agent_runs', state.jobId); + await ensureDir(debugDir); + const file = path.join(debugDir, 'llm_debug.jsonl'); + const line = `${JSON.stringify({ ts: new Date().toISOString(), ...record })}\n`; + await fs.appendFile(file, line, 'utf8'); +} + +export async function invokeLLMTextWithDebug({ llm, messages, state, nodeName }) { + const debug = getTransferDebugConfig(); + const requestText = flattenMessages(messages); + const promptLength = requestText.length; + const promptHash = sha256(requestText); + const promptPreview = previewPair(requestText, debug.previewChars); + + const progressLog = []; + if (debug.level !== 'off') { + progressLog.push(`[${nodeName}] LLM request: promptLen=${promptLength}, promptSha256=${promptHash}.`); + if (debug.level !== 'meta') { + progressLog.push(`[${nodeName}] prompt preview: ${toSingleLine(promptPreview.head).slice(0, 240)}`); + if (promptPreview.tail) { + progressLog.push(`[${nodeName}] prompt preview tail: ${toSingleLine(promptPreview.tail).slice(0, 240)}`); + } + } + + const requestRecord = { + type: 'llm.request', + node: nodeName, + promptLength, + promptSha256: promptHash, + promptPreviewHead: promptPreview.head, + promptPreviewTail: promptPreview.tail, + }; + if (debug.level === 'full' && debug.allowFull) { + requestRecord.promptFull = requestText; + } + await appendLlmDebugRecord(state, requestRecord); + } + + const startedAt = Date.now(); + const response = await llm.invoke(messages); + const responseText = normalizeTextContent(response.content); + const durationMs = Date.now() - startedAt; + const responseLength = responseText.length; + const responseHash = sha256(responseText); + const responsePreview = previewPair(responseText, debug.previewChars); + + if (debug.level !== 'off') { + progressLog.push(`[${nodeName}] LLM response: durationMs=${durationMs}, responseLen=${responseLength}, responseSha256=${responseHash}.`); + if (debug.level !== 'meta') { + progressLog.push(`[${nodeName}] response preview: ${toSingleLine(responsePreview.head).slice(0, 240)}`); + if (responsePreview.tail) { + progressLog.push(`[${nodeName}] response preview tail: ${toSingleLine(responsePreview.tail).slice(0, 240)}`); + } + } + + const responseRecord = { + type: 'llm.response', + node: nodeName, + durationMs, + responseLength, + responseSha256: responseHash, + responsePreviewHead: responsePreview.head, + responsePreviewTail: responsePreview.tail, + }; + if (debug.level === 'full' && debug.allowFull) { + responseRecord.responseFull = responseText; + } + await appendLlmDebugRecord(state, responseRecord); + } + + return { text: responseText, progressLog }; +} diff --git a/apps/frontend/src/api/client.ts b/apps/frontend/src/api/client.ts index 8e77535..37ca564 100644 --- a/apps/frontend/src/api/client.ts +++ b/apps/frontend/src/api/client.ts @@ -313,6 +313,10 @@ export function listTemplates() { return request<{ templates: TemplateMeta[]; categories?: TemplateCategory[] }>('/api/templates'); } +export function listTemplateFiles(templateId: string) { + return request<{ files: string[] }>(`/api/templates/${encodeURIComponent(templateId)}/files`); +} + export async function uploadTemplate(templateId: string, templateLabel: string, file: File) { const form = new FormData(); form.append('templateId', templateId); @@ -480,6 +484,11 @@ export interface TransferStepResult { status: string; progressLog: string[]; error?: string; + currentNode?: string | null; + startedAt?: string | null; + updatedAt?: string | null; + finishedAt?: string | null; + transferMode?: 'legacy' | 'mineru'; } export interface PageImage { diff --git a/apps/frontend/src/app/App.css b/apps/frontend/src/app/App.css index ec09d64..0f16ab6 100644 --- a/apps/frontend/src/app/App.css +++ b/apps/frontend/src/app/App.css @@ -3695,6 +3695,13 @@ textarea.input { display: flex; flex-direction: column; overflow: hidden; + animation: transfer-widget-enter 180ms ease-out; + transition: transform 180ms ease, box-shadow 180ms ease; +} + +.transfer-widget:hover { + transform: translateY(-2px); + box-shadow: 0 10px 28px rgba(41, 27, 17, 0.16); } .transfer-widget-header { @@ -3731,7 +3738,7 @@ textarea.input { font-size: 11px; font-family: 'JetBrains Mono', monospace; background: rgba(120, 98, 83, 0.06); - max-height: 220px; + max-height: 150px; overflow-y: auto; line-height: 1.5; } @@ -3739,3 +3746,20 @@ textarea.input { .transfer-widget-log > div { margin-bottom: 2px; } + +.transfer-log-line { + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; +} + +@keyframes transfer-widget-enter { + from { + opacity: 0; + transform: translateY(8px) scale(0.985); + } + to { + opacity: 1; + transform: translateY(0) scale(1); + } +} diff --git a/apps/frontend/src/app/ProjectPage.tsx b/apps/frontend/src/app/ProjectPage.tsx index acb88ab..7c4f91c 100644 --- a/apps/frontend/src/app/ProjectPage.tsx +++ b/apps/frontend/src/app/ProjectPage.tsx @@ -14,13 +14,18 @@ import { trashProject, updateProjectTags, permanentDeleteProject, - uploadTemplate + uploadTemplate, + transferStatus, } from '../api/client'; import type { ProjectMeta, TemplateMeta, TemplateCategory } from '../api/client'; import TransferPanel from './TransferPanel'; type ViewFilter = 'all' | 'mine' | 'archived' | 'trash'; type SortBy = 'updatedAt' | 'name' | 'createdAt'; +const TERMINAL_TRANSFER_STATUS = new Set(['success', 'failed', 'error']); +const TRANSFER_POLL_MS = 1000; +const MAX_WIDGET_LOG_LINES = 80; +const MAX_WIDGET_LOG_LINE_CHARS = 180; const SETTINGS_KEY = 'openprism-settings-v1'; @@ -117,20 +122,103 @@ export default function ProjectPage() { // Active transfer job (persists after modal close) const [activeJob, setActiveJob] = useState<{ - jobId: string; status: string; progressLog: string[]; error?: string; + jobId: string; + status: string; + progressLog: string[]; + error?: string | null; + currentNode?: string | null; + startedAt?: string | null; + updatedAt?: string | null; + finishedAt?: string | null; sourceName?: string; + sourceId?: string; } | null>(null); const [jobWidgetOpen, setJobWidgetOpen] = useState(true); + const jobPollTimerRef = useRef(null); + const monitoredJobIdRef = useRef(null); // Template upload state const templateZipRef = useRef(null); const [uploadingTemplate, setUploadingTemplate] = useState(false); + const formatWidgetLogLine = (line: string) => { + const text = String(line || ''); + if (text.length <= MAX_WIDGET_LOG_LINE_CHARS) return text; + return `${text.slice(0, MAX_WIDGET_LOG_LINE_CHARS)}...`; + }; const loadProjects = useCallback(async () => { const res = await listProjects(); setProjects(res.projects || []); }, []); + const stopTransferMonitor = useCallback(() => { + monitoredJobIdRef.current = null; + if (jobPollTimerRef.current !== null) { + window.clearTimeout(jobPollTimerRef.current); + jobPollTimerRef.current = null; + } + }, []); + + const pollTransferJob = useCallback(async (jobId: string) => { + if (monitoredJobIdRef.current !== jobId) return; + try { + const res = await transferStatus(jobId); + if (monitoredJobIdRef.current !== jobId) return; + + setActiveJob((prev) => { + if (!prev || prev.jobId !== jobId) return prev; + return { + ...prev, + status: res.status, + progressLog: res.progressLog || [], + error: res.error || null, + currentNode: res.currentNode || null, + startedAt: res.startedAt || null, + updatedAt: res.updatedAt || null, + finishedAt: res.finishedAt || null, + }; + }); + + if (TERMINAL_TRANSFER_STATUS.has(res.status)) { + stopTransferMonitor(); + if (res.status === 'success') { + loadProjects().catch(() => {}); + } + return; + } + + jobPollTimerRef.current = window.setTimeout(() => { + pollTransferJob(jobId).catch(() => {}); + }, TRANSFER_POLL_MS); + } catch (err) { + setActiveJob((prev) => { + if (!prev || prev.jobId !== jobId) return prev; + return { + ...prev, + status: 'error', + error: String(err), + }; + }); + stopTransferMonitor(); + } + }, [loadProjects, stopTransferMonitor]); + + const startTransferMonitor = useCallback((job: { + jobId: string; + status: string; + progressLog: string[]; + error?: string | null; + currentNode?: string | null; + sourceName?: string; + sourceId?: string; + }) => { + stopTransferMonitor(); + monitoredJobIdRef.current = job.jobId; + setActiveJob(job); + setJobWidgetOpen(true); + pollTransferJob(job.jobId).catch(() => {}); + }, [pollTransferJob, stopTransferMonitor]); + useEffect(() => { loadProjects().catch((err) => setStatus(t('加载项目失败: {{error}}', { error: String(err) }))); }, [loadProjects, t]); @@ -147,6 +235,8 @@ export default function ProjectPage() { .catch((err) => setStatus(t('模板加载失败: {{error}}', { error: String(err) }))); }, [createTemplate, t]); + useEffect(() => () => stopTransferMonitor(), [stopTransferMonitor]); + const allTags = useMemo(() => { const s = new Set(); projects.forEach((p) => (p.tags || []).forEach((tag) => s.add(tag))); @@ -327,6 +417,13 @@ export default function ProjectPage() { } }; + const minimizeTransferModal = () => { + setTransferOpen(false); + if (activeJob && !TERMINAL_TRANSFER_STATUS.has(activeJob.status)) { + setJobWidgetOpen(true); + } + }; + const handleCopy = async (id: string, originalName: string) => { try { const res = await copyProject(id, `${originalName} (Copy)`); @@ -891,19 +988,22 @@ export default function ProjectPage() { {/* Transfer Modal */} {transferOpen && transferSource && ( -
setTransferOpen(false)}> +
event.stopPropagation()}>
{t('模板转换')} — {transferSource.name}
- +
{ - setActiveJob({ ...job, sourceName: transferSource.name }); - setJobWidgetOpen(true); - if (job.status === 'success') loadProjects(); + jobState={activeJob && activeJob.sourceId === transferSource.id ? activeJob : null} + onJobStart={(job) => { + startTransferMonitor({ + ...job, + sourceName: transferSource.name, + sourceId: transferSource.id, + }); }} />
@@ -918,22 +1018,31 @@ export default function ProjectPage() { {t('模板转换')} — {activeJob.sourceName || ''}
{t('状态')}: {activeJob.status} + {activeJob.currentNode && ( + + ({t('节点')}: {activeJob.currentNode}) + + )}
{activeJob.error && (
{activeJob.error}
)} {activeJob.progressLog.length > 0 && (
- {activeJob.progressLog.map((line, i) => ( -
{line}
+ {activeJob.progressLog.slice(-MAX_WIDGET_LOG_LINES).map((line, i) => ( +
+ {formatWidgetLogLine(line)} +
))}
)} diff --git a/apps/frontend/src/app/TransferPanel.tsx b/apps/frontend/src/app/TransferPanel.tsx index 9144590..2633f51 100644 --- a/apps/frontend/src/app/TransferPanel.tsx +++ b/apps/frontend/src/app/TransferPanel.tsx @@ -2,30 +2,40 @@ import { useState, useCallback, useEffect, useRef } from 'react'; import { useTranslation } from 'react-i18next'; import { transferStart, - transferStep, - transferSubmitImages, mineruTransferStart, mineruTransferUploadPdf, listTemplates, + listTemplateFiles, getProjectTree, } from '../api/client'; import type { LLMConfig, TemplateMeta, - FileItem, } from '../api/client'; +interface TransferPanelJobState { + jobId: string; + status: string; + progressLog: string[]; + error?: string | null; + currentNode?: string | null; +} + interface TransferPanelProps { projectId: string; - onJobUpdate?: (job: { jobId: string; status: string; progressLog: string[]; error?: string }) => void; + jobState?: TransferPanelJobState | null; + onJobStart?: (job: TransferPanelJobState) => void; } type TransferMode = 'legacy' | 'mineru'; type MineruSource = 'project' | 'upload'; const ENGINES = ['pdflatex', 'xelatex', 'lualatex', 'latexmk'] as const; +const TERMINAL_STATUS = new Set(['success', 'failed', 'error']); +const MAX_VISIBLE_LOG_LINES = 80; +const MAX_LOG_LINE_CHARS = 180; -export default function TransferPanel({ projectId, onJobUpdate }: TransferPanelProps) { +export default function TransferPanel({ projectId, jobState, onJobStart }: TransferPanelProps) { const { t } = useTranslation(); // Transfer mode @@ -40,8 +50,9 @@ export default function TransferPanel({ projectId, onJobUpdate }: TransferPanelP // Target selection const [targetTemplateId, setTargetTemplateId] = useState(''); + const [targetMainFile, setTargetMainFile] = useState(''); + const [targetMainFiles, setTargetMainFiles] = useState([]); const [engine, setEngine] = useState('pdflatex'); - const [layoutCheck, setLayoutCheck] = useState(false); // LLM config — read from shared localStorage (set via ProjectPage / EditorPage settings) const SETTINGS_KEY = 'openprism-settings-v1'; @@ -82,6 +93,7 @@ export default function TransferPanel({ projectId, onJobUpdate }: TransferPanelP // Dropdown open states const [templateDropdownOpen, setTemplateDropdownOpen] = useState(false); + const [targetMainDropdownOpen, setTargetMainDropdownOpen] = useState(false); const [engineDropdownOpen, setEngineDropdownOpen] = useState(false); const [modeDropdownOpen, setModeDropdownOpen] = useState(false); @@ -99,6 +111,7 @@ export default function TransferPanel({ projectId, onJobUpdate }: TransferPanelP // Refs for click-outside const sourceRef = useRef(null); const templateRef = useRef(null); + const targetMainRef = useRef(null); const engineRef = useRef(null); const modeRef = useRef(null); const pdfInputRef = useRef(null); @@ -131,11 +144,78 @@ export default function TransferPanel({ projectId, onJobUpdate }: TransferPanelP } }, [templatesLoaded]); + // Sync job state from parent monitor + useEffect(() => { + if (!jobState) return; + if (jobId && jobState.jobId !== jobId) return; + + if (!jobId) setJobId(jobState.jobId); + setStatus(jobState.status || 'idle'); + setProgressLog(jobState.progressLog || []); + setError(jobState.error || ''); + + if (TERMINAL_STATUS.has(jobState.status)) { + setRunning(false); + } + }, [jobState, jobId]); + + const selectedTemplate = templates.find(tp => tp.id === targetTemplateId); + const selectedTemplateName = selectedTemplate?.label || ''; + const selectedTargetMainFile = targetMainFile || selectedTemplate?.mainFile || ''; + + const activeJobState = jobState && (!jobId || jobState.jobId === jobId) ? jobState : null; + const viewStatus = activeJobState?.status || status; + const viewLog = (activeJobState?.progressLog || progressLog).slice(-MAX_VISIBLE_LOG_LINES); + const viewError = activeJobState?.error || error; + const viewCurrentNode = activeJobState?.currentNode || null; + const formatLogLine = (line: string) => { + const text = String(line || ''); + if (text.length <= MAX_LOG_LINE_CHARS) return text; + return `${text.slice(0, MAX_LOG_LINE_CHARS)}...`; + }; + + // Load target template tex files and pick a default target main file + useEffect(() => { + if (!targetTemplateId) { + setTargetMainFiles([]); + setTargetMainFile(''); + return; + } + + const manifestDefault = templates.find(tp => tp.id === targetTemplateId)?.mainFile || ''; + let cancelled = false; + listTemplateFiles(targetTemplateId) + .then((res) => { + if (cancelled) return; + const files = (res.files || []).filter(f => f.toLowerCase().endsWith('.tex')); + setTargetMainFiles(files); + + if (files.includes(manifestDefault)) { + setTargetMainFile(manifestDefault); + return; + } + if (files.length > 0) { + setTargetMainFile(files[0]); + return; + } + setTargetMainFile(manifestDefault || 'main.tex'); + }) + .catch(() => { + if (cancelled) return; + const fallback = manifestDefault || 'main.tex'; + setTargetMainFiles([]); + setTargetMainFile(fallback); + }); + + return () => { cancelled = true; }; + }, [targetTemplateId, templates]); + // Click outside to close dropdowns useEffect(() => { const handler = (e: MouseEvent) => { if (sourceRef.current && !sourceRef.current.contains(e.target as Node)) setSourceDropdownOpen(false); if (templateRef.current && !templateRef.current.contains(e.target as Node)) setTemplateDropdownOpen(false); + if (targetMainRef.current && !targetMainRef.current.contains(e.target as Node)) setTargetMainDropdownOpen(false); if (engineRef.current && !engineRef.current.contains(e.target as Node)) setEngineDropdownOpen(false); if (modeRef.current && !modeRef.current.contains(e.target as Node)) setModeDropdownOpen(false); }; @@ -143,9 +223,6 @@ export default function TransferPanel({ projectId, onJobUpdate }: TransferPanelP return () => document.removeEventListener('mousedown', handler); }, []); - const selectedTemplateName = templates.find(tp => tp.id === targetTemplateId)?.label || ''; - const selectedTemplate = templates.find(tp => tp.id === targetTemplateId); - const buildLlmConfig = (): Partial | undefined => { const { llmEndpoint, llmApiKey, llmModel } = readLLMFromStorage(); if (!llmEndpoint && !llmApiKey && !llmModel) return undefined; @@ -158,7 +235,7 @@ export default function TransferPanel({ projectId, onJobUpdate }: TransferPanelP const handleStart = useCallback(async () => { if (!targetTemplateId) return; - const targetMainFile = selectedTemplate?.mainFile || 'main.tex'; + if (!selectedTargetMainFile) return; setError(''); setProgressLog([]); setRunning(true); @@ -166,7 +243,6 @@ export default function TransferPanel({ projectId, onJobUpdate }: TransferPanelP try { if (transferMode === 'mineru') { - // MinerU mode — persist config to localStorage saveMineruConfigToStorage(mineruApiBase, mineruToken); const mineruConfig = (mineruApiBase || mineruToken) ? { @@ -179,69 +255,67 @@ export default function TransferPanel({ projectId, onJobUpdate }: TransferPanelP sourceProjectId: mineruSource === 'project' ? projectId : undefined, sourceMainFile: mineruSource === 'project' ? sourceMainFile : undefined, targetTemplateId, - targetMainFile, + targetMainFile: selectedTargetMainFile, engine, - layoutCheck, llmConfig: buildLlmConfig(), mineruConfig, }); setJobId(res.jobId); + onJobStart?.({ + jobId: res.jobId, + status: mineruSource === 'upload' ? 'waiting_upload' : 'running', + progressLog: [], + error: null, + currentNode: null, + }); - // If uploading PDF, upload it before running graph if (mineruSource === 'upload' && uploadedPdf) { setStatus('uploading_pdf'); await mineruTransferUploadPdf(res.jobId, uploadedPdf); } - setStatus('started'); - await runGraph(res.jobId); + setRunning(false); + setStatus('running'); } else { - // Legacy mode if (!sourceMainFile) return; const res = await transferStart({ sourceProjectId: projectId, sourceMainFile, targetTemplateId, - targetMainFile, + targetMainFile: selectedTargetMainFile, engine, - layoutCheck, llmConfig: buildLlmConfig(), }); + setJobId(res.jobId); - setStatus('started'); - await runGraph(res.jobId); + setRunning(false); + setStatus('running'); + onJobStart?.({ + jobId: res.jobId, + status: 'running', + progressLog: [], + error: null, + currentNode: null, + }); } } catch (err: any) { setError(err.message || 'Failed to start transfer'); setRunning(false); setStatus('error'); } - }, [transferMode, mineruSource, uploadedPdf, targetTemplateId, sourceMainFile, projectId, engine, layoutCheck, selectedTemplate, mineruApiBase, mineruToken]); - - const runGraph = useCallback(async (jid: string) => { - // eslint-disable-next-line no-constant-condition - while (true) { - try { - const res = await transferStep(jid); - setProgressLog(res.progressLog || []); - setStatus(res.status); - onJobUpdate?.({ jobId: jid, status: res.status, progressLog: res.progressLog || [], error: res.error }); - - if (res.status === 'waiting_images') { setRunning(false); return; } - if (res.status === 'success' || res.status === 'failed') { setRunning(false); return; } - if (res.error) { setError(res.error); setRunning(false); return; } - - // Brief pause before next poll - await new Promise(r => setTimeout(r, 1000)); - } catch (err: any) { - setError(err.message || 'Step failed'); - setRunning(false); - setStatus('error'); - onJobUpdate?.({ jobId: jid, status: 'error', progressLog: [], error: err.message }); - return; - } - } - }, [onJobUpdate]); + }, [ + transferMode, + mineruSource, + uploadedPdf, + targetTemplateId, + sourceMainFile, + projectId, + engine, + selectedTargetMainFile, + mineruApiBase, + mineruToken, + onJobStart, + ]); const chevronSvg = (open: boolean) => ( @@ -256,9 +330,10 @@ export default function TransferPanel({ projectId, onJobUpdate }: TransferPanelP ); const modeLabel = transferMode === 'mineru' ? 'MinerU (PDF→MD→LaTeX)' : t('经典模式 (LaTeX→LaTeX)'); + const activeTransfer = viewStatus && !TERMINAL_STATUS.has(viewStatus) && viewStatus !== 'idle'; const canStart = (() => { - if (running || !targetTemplateId) return false; + if (running || activeTransfer || !targetTemplateId || !selectedTargetMainFile) return false; if (transferMode === 'legacy') return !!sourceMainFile; if (transferMode === 'mineru') { if (mineruSource === 'project') return !!sourceMainFile; @@ -297,6 +372,7 @@ export default function TransferPanel({ projectId, onJobUpdate }: TransferPanelP )}
+ {/* MinerU mode: source selection (project or upload) */} {transferMode === 'mineru' && (
@@ -390,7 +466,11 @@ export default function TransferPanel({ projectId, onJobUpdate }: TransferPanelP
{ setTargetTemplateId(tmpl.id); setTemplateDropdownOpen(false); }} + onClick={() => { + setTargetTemplateId(tmpl.id); + setTemplateDropdownOpen(false); + setTargetMainDropdownOpen(false); + }} > {tmpl.label} {targetTemplateId === tmpl.id && checkSvg} @@ -406,6 +486,40 @@ export default function TransferPanel({ projectId, onJobUpdate }: TransferPanelP
+ {/* Target main file selection */} +
+ +
+ + {targetMainDropdownOpen && ( +
+ {targetMainFiles.map(file => ( +
{ setTargetMainFile(file); setTargetMainDropdownOpen(false); }} + > + {file} + {selectedTargetMainFile === file && checkSvg} +
+ ))} + {targetMainFiles.length === 0 && ( +
+ {t('未找到 .tex 文件')} +
+ )} +
+ )} +
+
+ {/* Engine selection */}
@@ -431,11 +545,9 @@ export default function TransferPanel({ projectId, onJobUpdate }: TransferPanelP
- {/* Layout check toggle */} - +
+ {t('排版检查 (VLM) 暂未开放;当前转换仅执行内容迁移、资源复制和编译修复。')} +
{/* MinerU API config — shown only in MinerU mode */} {transferMode === 'mineru' && ( @@ -469,8 +581,6 @@ export default function TransferPanel({ projectId, onJobUpdate }: TransferPanelP )} - {/* LLM Config — managed in header settings */} - {/* Start button */} {/* Status */} - {status !== 'idle' && ( + {viewStatus !== 'idle' && (
- {t('状态')}: {status} + {t('状态')}: {viewStatus} + {viewCurrentNode && ( + + ({t('节点')}: {viewCurrentNode}) + + )}
)} {/* Error */} - {error && ( -
{error}
+ {viewError && ( +
{viewError}
)} {/* Progress log */} - {progressLog.length > 0 && ( + {viewLog.length > 0 && (
- {progressLog.map((line, i) => ( -
{line}
+ {viewLog.map((line, i) => ( +
+ {formatLogLine(line)} +
))}
)} diff --git a/templates/manifest.json b/templates/manifest.json index c673ddc..f9fc46a 100644 --- a/templates/manifest.json +++ b/templates/manifest.json @@ -3,7 +3,7 @@ { "id": "acl", "label": "ACL", - "mainFile": "main.tex", + "mainFile": "acl_latex.tex", "category": "academic", "description": "ACL 会议论文模板,适用于自然语言处理领域", "descriptionEn": "ACL conference paper template for NLP research",