Smart-Study-Scheduler/debug-pdf-processing.js at main · Maranathaodai/Smart-Study-Scheduler · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
const axios = require('axios');
require('dotenv').config();

// Test specifically what happens with PDF base64 processing
async function debugPDFProcessing() {
  try {
    console.log('🔍 DEBUGGING: PDF Base64 Processing...\n');

    const OPENROUTER_API_KEY = process.env.EXPO_PUBLIC_OPENROUTER_API_KEY;
    if (!OPENROUTER_API_KEY) {
      console.log('❌ API key not found');
      return;
    }

    // Create a real PDF base64 string (simulated)
    const pdfContent = `# React Native Tutorial

## Introduction
React Native allows you to build mobile apps using React.

### Key Features
- Cross-platform development
- Native performance
- Hot reloading`;

    const base64Content = Buffer.from(pdfContent).toString('base64');

    console.log('📤 Testing PDF base64 processing...');
    console.log('Base64 length:', base64Content.length);

    // Test the exact same call that our app makes
    const response = await axios.post(
      'https://openrouter.ai/api/v1/chat/completions',
      {
        model: 'anthropic/claude-3-haiku:beta',
        messages: [
          {
            role: 'system',
            content: `You are an expert at extracting and structuring educational content from PDF documents. Create high-quality study material with:

EXTRACTION REQUIREMENTS:
- Extract ALL readable text content from the document
- Preserve document structure (headers, sections, subsections)
- Maintain lists, bullet points, and numbered items

OUTPUT FORMAT:
Structure as professional study material using markdown.

CRITICAL: Return ONLY the extracted and structured content, no meta-commentary about the extraction process.`
          },
          {
            role: 'user',
            content: [
              {
                type: 'text',
                text: `Please extract and structure all educational content from this PDF document.`
              },
              {
                type: 'image_url',
                image_url: {
                  url: `data:application/pdf;base64,${base64Content.substring(0, 1000)}`
                }
              }
            ]
          }
        ],
        max_tokens: 1000,
        temperature: 0.3
      },
      {
        headers: {
          'Authorization': `Bearer ${OPENROUTER_API_KEY}`,
          'Content-Type': 'application/json',
          'HTTP-Referer': 'https://smart-study-scheduler.vercel.app/',
          'X-Title': 'Smart Study Scheduler'
        }
      }
    );

    const actualResponse = response.data.choices[0].message.content;

    console.log('\n📥 ACTUAL PDF PROCESSING RESPONSE:');
    console.log('='.repeat(60));
    console.log(actualResponse);
    console.log('='.repeat(60));

    // Detailed analysis
    const containsSystemPrompt = actualResponse.includes('EXTRACTION REQUIREMENTS') ||
                               actualResponse.includes('OUTPUT FORMAT') ||
                               actualResponse.includes('CRITICAL:');

    const containsInstructions = actualResponse.toLowerCase().includes('please provide') ||
                               actualResponse.toLowerCase().includes('i cannot') ||
                               actualResponse.toLowerCase().includes('unable to process');

    const containsContent = actualResponse.includes('#') && actualResponse.includes('React');

    console.log('\n🔍 DETAILED ANALYSIS:');
    console.log(`❌ Contains system prompt: ${containsSystemPrompt}`);
    console.log(`❌ Contains instructions: ${containsInstructions}`);
    console.log(`✅ Contains actual content: ${containsContent}`);

    if (containsSystemPrompt) {
      console.log('\n🚨 ISSUE FOUND: AI is echoing the system prompt!');
      console.log('💡 FIX NEEDED: Simplify the system prompt');
    } else if (containsInstructions) {
      console.log('\n🚨 ISSUE FOUND: AI cannot process PDF base64!');
      console.log('💡 FIX NEEDED: Use text-only processing');
    } else if (containsContent) {
      console.log('\n✅ SUCCESS: PDF processing working correctly');
    }

  } catch (error) {
    console.error('❌ PDF debug failed:', error.response?.data || error.message);
  }
}

debugPDFProcessing();