diff --git a/Python/PDf2Txt/README.md b/Python/PDf2Txt/README.md new file mode 100644 index 0000000..87a3a82 --- /dev/null +++ b/Python/PDf2Txt/README.md @@ -0,0 +1,24 @@ +# PDF to Text Converter + +This Python project automatically converts PDF files in a folder into plain text files using **PyPDF2**. Each PDF is processed page by page, and the extracted text is saved as a `.txt` file with the same name as the PDF. + +--- + +## Features + +- Automatically processes **all PDFs in the folder**. +- Skips PDFs with **no extractable text**. +- Handles errors gracefully and prints a **summary**. +- Creates `.txt` files named after the original PDF. + +--- + +## Requirements + +- Python 3.7 or higher +- PyPDF2 library + +Install PyPDF2 with pip: + +```bash +pip install PyPDF2 diff --git a/Python/PDf2Txt/Screenshot 2025-10-17 210928.png b/Python/PDf2Txt/Screenshot 2025-10-17 210928.png new file mode 100644 index 0000000..08183f3 Binary files /dev/null and b/Python/PDf2Txt/Screenshot 2025-10-17 210928.png differ diff --git a/Python/PDf2Txt/pdf_to_text.py b/Python/PDf2Txt/pdf_to_text.py new file mode 100644 index 0000000..dd59d29 --- /dev/null +++ b/Python/PDf2Txt/pdf_to_text.py @@ -0,0 +1,44 @@ +import os +import PyPDF2 + +folder_path = os.path.dirname(os.path.abspath(__file__)) + +total_pdfs = 0 +converted = 0 +skipped = 0 +failed = 0 + +for filename in os.listdir(folder_path): + if filename.lower().endswith('.pdf'): + total_pdfs += 1 + pdf_file_path = os.path.join(folder_path, filename) + txt_file_path = os.path.join(folder_path, f"{os.path.splitext(filename)[0]}.txt") + + try: + with open(pdf_file_path, 'rb') as file: + reader = PyPDF2.PdfReader(file) + text = '' + for page in reader.pages: + page_text = page.extract_text() + if page_text: + text += page_text + '\n' + + if text.strip(): + with open(txt_file_path, 'w', encoding='utf-8') as f: + f.write(text) + print(f"✅ Converted '{filename}' to '{os.path.basename(txt_file_path)}'") + converted += 1 + else: + print(f"⚠ Skipped '{filename}': no text found") + skipped += 1 + + except Exception as e: + print(f"❌ Failed to convert '{filename}': {e}") + failed += 1 + +print("\n--- Summary ---") +print(f"Total PDFs found: {total_pdfs}") +print(f"Converted: {converted}") +print(f"Skipped (no text): {skipped}") +print(f"Failed: {failed}") +print("Processing complete.")