-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
149 lines (118 loc) · 5.14 KB
/
app.py
File metadata and controls
149 lines (118 loc) · 5.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import gradio as gr
from pdf2image import convert_from_path
from pptx import Presentation
from pptx.util import Inches
from PIL import Image, ImageDraw
import random
import tempfile
import os
import io
def remove_watermark_and_convert(pdf_file, dpi=100, progress=gr.Progress()):
"""
Gradio handler to convert PDF to PPTX with watermark removal.
"""
if pdf_file is None:
return None
# --- Step 1: Convert PDF to Images ---
progress(0, desc="Starting conversion...")
try:
# Gradio passes a temp file object. We use .name to get the path.
print(f"Processing: {pdf_file.name}")
images = convert_from_path(pdf_file.name, dpi=dpi)
except Exception as e:
raise gr.Error(f"Error reading PDF. Is Poppler installed? Details: {e}")
total_pages = len(images)
processed_images = []
# --- Step 2: Remove Watermark ---
for i, image in enumerate(images):
# Update progress bar (0% to 50%)
progress((i / total_pages) * 0.5, desc=f"Removing watermark: Page {i+1}/{total_pages}")
width, height = image.size
# Define watermark area (Bottom Right 150x35)
watermark_width = 150
watermark_height = 35
x1 = width - watermark_width
y1 = height - watermark_height
x2 = width
y2 = height
# Get reference pixels
pixel_bottom_right = image.getpixel((width - 1, height - 1))
pixel_top = image.getpixel((width - 1, y1 - 1)) if y1 > 0 else pixel_bottom_right
pixel_left = image.getpixel((x1 - 1, height - 1)) if x1 > 0 else pixel_bottom_right
draw = ImageDraw.Draw(image)
# Pixel manipulation loop (Your original logic)
for x in range(x1, x2):
for y in range(y1, y2):
choice = random.randint(0, 2)
if choice == 0:
color = pixel_bottom_right
elif choice == 1:
color = pixel_top
else:
color = pixel_left
# Add slight noise
if isinstance(color, tuple) and len(color) >= 3:
r = max(0, min(255, color[0] + random.randint(-2, 2)))
g = max(0, min(255, color[1] + random.randint(-2, 2)))
b = max(0, min(255, color[2] + random.randint(-2, 2)))
color = (r, g, b) if len(color) == 3 else (r, g, b, color[3])
draw.point((x, y), fill=color)
if image.mode != 'RGB':
image = image.convert('RGB')
processed_images.append(image)
# --- Step 3: Create PPTX ---
progress(0.5, desc="Generating PowerPoint slides...")
prs = Presentation()
prs.slide_width = Inches(10)
prs.slide_height = Inches(5.625)
for i, image in enumerate(processed_images):
# Update progress bar (50% to 100%)
progress(0.5 + ((i / total_pages) * 0.5), desc=f"Adding slide {i+1}/{total_pages}")
blank_slide_layout = prs.slide_layouts[6]
slide = prs.slides.add_slide(blank_slide_layout)
# Save image to memory buffer for PPTX insertion
img_stream = io.BytesIO()
image.save(img_stream, format='PNG')
img_stream.seek(0)
slide.shapes.add_picture(img_stream, 0, 0, width=prs.slide_width, height=prs.slide_height)
# --- Step 4: Save and Return ---
# Create a temporary file path for the output
# We use the original filename stem to name the output
original_stem = os.path.splitext(os.path.basename(pdf_file.name))[0]
# Handle weird Gradio temp names if necessary, but usually safe to just make a new temp
output_dir = tempfile.gettempdir()
output_path = os.path.join(output_dir, f"{original_stem}_converted.pptx")
prs.save(output_path)
progress(1.0, desc="Done!")
return output_path
# --- Build the Gradio UI ---
with gr.Blocks(title="PDF to PPTX Cleaner") as demo:
gr.Markdown("# 📄 PDF to PPTX Watermark Remover")
gr.Markdown("Upload a PDF to remove the bottom-right watermark and convert it to PowerPoint.")
with gr.Row():
with gr.Column():
# Input Section
input_file = gr.File(label="Upload PDF", file_types=[".pdf"], type="filepath")
with gr.Accordion("Advanced Settings", open=True):
dpi_slider = gr.Slider(
minimum=72,
maximum=200,
value=100,
step=10,
label="Quality (DPI)",
info="Higher DPI = Better quality but slower processing."
)
convert_btn = gr.Button("🚀 Start Conversion", variant="primary")
with gr.Column():
# Output Section
output_file = gr.File(label="Download PowerPoint")
# Event Listener
convert_btn.click(
fn=remove_watermark_and_convert,
inputs=[input_file, dpi_slider],
outputs=output_file
)
if __name__ == "__main__":
# Support for Render/Docker Port mapping
port = int(os.environ.get("PORT", 7860))
demo.launch(server_name="0.0.0.0", server_port=port)