Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
3ff8869
Update deploy.yml
PaulPextra Jun 19, 2025
2bf0fb8
Update deploy.yml
PaulPextra Jun 19, 2025
1dba68e
adding logs
PaulPextra Jun 20, 2025
82e1aec
Merge pull request #1 from PaulPextra/paul/chatgpt
PaulPextra Jun 20, 2025
3148fc6
remove logs
PaulPextra Jun 20, 2025
c20cca4
Merge pull request #2 from PaulPextra/paul/chatgpt
PaulPextra Jun 20, 2025
446966f
Merge branch 'TechX-Resources:main' into main
PaulPextra Jun 25, 2025
2f1f585
scrape chatgpt
PaulPextra Jul 4, 2025
55d8c42
scrape chatgpt
PaulPextra Jul 4, 2025
13e0b45
scrape chatgpt update
PaulPextra Jul 4, 2025
49ac953
scrape chatgpt
PaulPextra Jul 4, 2025
a120e1d
update: scrape conversation
PaulPextra Jul 4, 2025
8ef59a0
update: scrape model conversation
PaulPextra Jul 4, 2025
06a088a
fix: model-conversation
PaulPextra Jul 4, 2025
31a4229
update
PaulPextra Jul 4, 2025
c06ffb1
update
PaulPextra Jul 4, 2025
674992a
update
PaulPextra Jul 4, 2025
254d70e
update
PaulPextra Jul 4, 2025
f424e9f
update
PaulPextra Jul 5, 2025
30cedcf
update
PaulPextra Jul 5, 2025
c853851
update
PaulPextra Jul 5, 2025
921325c
update
PaulPextra Jul 7, 2025
9314ec5
update
PaulPextra Jul 7, 2025
645f8bd
update: parsechatgpt
PaulPextra Jul 8, 2025
8870229
Scrape model conversation
PaulPextra Jul 8, 2025
d5326d9
quick-fix: model-conversation
PaulPextra Jul 9, 2025
c6c5b85
fix:chatgpt-model
PaulPextra Jul 9, 2025
e983fce
fix: chatgpt-model-conversation
PaulPextra Jul 9, 2025
13a931f
update: chatgpt conversation
PaulPextra Jul 9, 2025
f12444f
update: scrape chatgpt conversation
PaulPextra Jul 9, 2025
b42fe53
update: chatgpt conversation
PaulPextra Jul 9, 2025
bed8e66
fix: chatgpt conversation
PaulPextra Jul 9, 2025
201d304
update: chatgpt
PaulPextra Jul 10, 2025
8da5b49
update: puppeteer
PaulPextra Jul 10, 2025
8441e04
update: chatgpt
PaulPextra Jul 10, 2025
1921dd1
fix: chatgpt conversation
PaulPextra Jul 11, 2025
b019a8e
Update: chatgpt
PaulPextra Jul 15, 2025
70b22cc
update:chatgpt
PaulPextra Jul 15, 2025
695f221
update: chatgpt-model
PaulPextra Jul 15, 2025
29a3089
update:chatgpt-model
PaulPextra Jul 16, 2025
47a99a3
update:chatgpt
PaulPextra Jul 16, 2025
0c899ce
fix:chatgpt-model
PaulPextra Jul 17, 2025
e3b3419
fix:chatgpt-model
PaulPextra Jul 17, 2025
e791469
update:chatgpt-model
PaulPextra Jul 17, 2025
bf7a53a
fix:chatgpt-model
PaulPextra Jul 17, 2025
24375bf
fix:api-route
PaulPextra Jul 18, 2025
cc58ce0
debug error
PaulPextra Jul 30, 2025
591574d
update: route.ts
PaulPextra Aug 3, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 4 additions & 9 deletions app/api/conversation/[id]/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,19 +45,14 @@ export async function GET(
{ params }: { params: Promise<{ id: string }> }
): Promise<NextResponse> {
try {
// console.log(params);
await ensureInitialized();
const id = (await params).id;

// Get conversation record from database
const record = await getConversationRecord(id);
const signedUrl = await s3Client.getSignedReadUrl(record.contentKey);
console.log("Error Debuging", id, record, signedUrl);

// Get conversation content from S3
const content = await s3Client.getConversationContent(record.contentKey);

return NextResponse.json({
conversation: record,
content: content,
});
return NextResponse.json({ url: signedUrl });
} catch (error) {
console.error('Error retrieving conversation:', error);

Expand Down
19 changes: 16 additions & 3 deletions app/api/conversation/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,22 @@ export async function POST(req: NextRequest) {
return NextResponse.json({ error: '`htmlDoc` must be a file field' }, { status: 400 });
}

// Parse the conversation from HTML
// Parse the conversation from HTML if skipScraping is false, otherwise skip parsing
const skipScraping = formData.has('skipScraping');
let conversation;
const html = await file.text();
const conversation = await parseHtmlToConversation(html, model);
if (!skipScraping) {
conversation = await parseHtmlToConversation(html, model);
} else {
// Remove CSS rule from the HTML string
const cleanedHtml = html.replace(/body\s*\{[^}]*\}/gm, '');
conversation = {
model: model,
content: cleanedHtml,
scrapedAt: new Date().toISOString(),
sourceHtmlBytes: cleanedHtml.length,
};
}

// Generate a unique ID for the conversation
const conversationId = randomUUID();
Expand Down Expand Up @@ -162,4 +175,4 @@ export async function GET(req: NextRequest) {
console.error('Error retrieving conversations:', err);
return NextResponse.json({ error: 'Internal error, see logs' }, { status: 500 });
}
}
}
32 changes: 29 additions & 3 deletions lib/parsers/chatgpt.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,39 @@
import type { Conversation } from '@/types/conversation';
import { JSDOM } from 'jsdom';

/**
* Extracts a ChatGPT share page into a structured Conversation.
* Extract ChatGPT conversation blocks from HTML input without modifying styles.
* @param html Raw HTML string
*/
export async function parseChatGPT(html: string): Promise<Conversation> {
const dom = new JSDOM(html);
const document = dom.window.document;

// Grab only the core conversation blocks
const blocks = Array.from(
document.querySelectorAll('div.markdown.prose.dark\\:prose-invert.w-full.break-words')
);

if (blocks.length === 0) {
throw new Error('Conversation content not found');
}

// Leave all <link> styles untouched and embed conversation only
const htmlContent = `
<html>
<head>
${document.head.innerHTML}
</head>
<body class="dark">
${blocks.map((b) => b.outerHTML).join('\n')}
</body>
</html>
`;

return {
model: 'ChatGPT',
content: html,
content: htmlContent,
scrapedAt: new Date().toISOString(),
sourceHtmlBytes: html.length,
sourceHtmlBytes: Buffer.byteLength(htmlContent),
};
}
Loading