From 36397c050fe56035619979799c9244a64a1722cc Mon Sep 17 00:00:00 2001 From: kawsarnoor Date: Sun, 25 May 2025 18:51:08 +0100 Subject: [PATCH] dummy onbase api added --- src/dummy_onbase/Dockerfile | 24 ++++++ src/dummy_onbase/README.md | 30 +++++++ src/dummy_onbase/app.py | 112 +++++++++++++++++++++++++ src/dummy_onbase/data/.keep | 0 src/dummy_onbase/templates/upload.html | 21 +++++ 5 files changed, 187 insertions(+) create mode 100644 src/dummy_onbase/Dockerfile create mode 100644 src/dummy_onbase/README.md create mode 100644 src/dummy_onbase/app.py create mode 100644 src/dummy_onbase/data/.keep create mode 100644 src/dummy_onbase/templates/upload.html diff --git a/src/dummy_onbase/Dockerfile b/src/dummy_onbase/Dockerfile new file mode 100644 index 0000000..38cb96c --- /dev/null +++ b/src/dummy_onbase/Dockerfile @@ -0,0 +1,24 @@ +# Use an official Python image +FROM python:3.11-slim + +# Set environment vars +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +# Create app directory +WORKDIR /app + +# Copy app files +COPY . /app + +# Install dependencies +RUN pip install --no-cache-dir flask flask-cors + +# Ensure data folder exists +RUN mkdir -p /app/data + +# Expose Flask port +EXPOSE 5000 + +# Run the app +CMD ["python", "app.py"] diff --git a/src/dummy_onbase/README.md b/src/dummy_onbase/README.md new file mode 100644 index 0000000..ea7fd06 --- /dev/null +++ b/src/dummy_onbase/README.md @@ -0,0 +1,30 @@ +# Flask PDF Uploader + +A simple Flask app to upload individual PDF files or ZIP archives containing multiple PDFs. Uploaded files are stored with a timestamp and unique ID. + +## Features + +- Upload a single `.pdf` file +- Upload a `.zip` file containing multiple PDFs +- View and download uploaded PDFs +- Filter documents by date range + +## Requirements + +- Python 3.11+ (if running without Docker) +- Docker (recommended for deployment) + +--- + +## 🐳 Run with Docker + +### 1. Build the Docker image + +```bash +docker build -t pdf-uploader . +``` + +### 2. Run the container +```bash +docker run -d -p 49123:5000 -v $(pwd)/data:/app/data pdf-uploader +``` \ No newline at end of file diff --git a/src/dummy_onbase/app.py b/src/dummy_onbase/app.py new file mode 100644 index 0000000..7fe8580 --- /dev/null +++ b/src/dummy_onbase/app.py @@ -0,0 +1,112 @@ +from flask import Flask, request, send_from_directory, render_template, jsonify +from flask_cors import CORS +from datetime import datetime +import os +import uuid +import zipfile + +app = Flask(__name__) +CORS(app) + +UPLOAD_FOLDER = 'data' +os.makedirs(UPLOAD_FOLDER, exist_ok=True) + +@app.route('/') +def index(): + files = sorted(f for f in os.listdir(UPLOAD_FOLDER) if f.endswith('.pdf')) + return render_template('upload.html', files=files) + +@app.route('/upload', methods=['POST']) +def upload_file(): + uploaded_file = request.files.get('file') + if not uploaded_file: + return "No file uploaded", 400 + + if uploaded_file.filename.endswith('.pdf'): + return save_pdf_file(uploaded_file) + + if uploaded_file.filename.endswith('.zip'): + return extract_and_save_pdfs_from_zip(uploaded_file) + + return "Invalid file type", 400 + +def save_pdf_file(file): + timestamp = datetime.now().strftime('%Y%m%d') + doc_id = str(uuid.uuid4())[:8] + original = os.path.splitext(file.filename)[0] + filename = f"{original}_{doc_id}_{timestamp}.pdf" + filepath = os.path.join(UPLOAD_FOLDER, filename) + file.save(filepath) + return f"PDF uploaded as {filename}
Back", 200 + +def extract_and_save_pdfs_from_zip(file): + try: + with zipfile.ZipFile(file) as zip_ref: + seen = set() + count = 0 + for member in zip_ref.infolist(): + filename = os.path.basename(member.filename) + + # Skip directories, hidden files, __MACOSX, or already-seen names + if (not filename or + not filename.lower().endswith('.pdf') or + '__macosx' in member.filename.lower() or + filename in seen): + continue + + seen.add(filename) + + with zip_ref.open(member) as pdf_file: + timestamp = datetime.now().strftime('%Y%m%d') + doc_id = str(uuid.uuid4())[:8] + original = os.path.splitext(filename)[0] + saved_filename = f"{original}_{doc_id}_{timestamp}.pdf" + filepath = os.path.join(UPLOAD_FOLDER, saved_filename) + with open(filepath, 'wb') as f: + f.write(pdf_file.read()) + count += 1 + return f"{count} PDF(s) extracted and uploaded
Back", 200 + except zipfile.BadZipFile: + return "Invalid ZIP file", 400 + + +@app.route('/document/', methods=['GET']) +def get_document_by_filename(filename): + return send_from_directory(UPLOAD_FOLDER, filename, as_attachment=True) + +@app.route('/documents', methods=['GET']) +def get_documents_in_range(): + start = request.args.get('start') + end = request.args.get('end') + if not start or not end: + return "Missing start or end parameter", 400 + + try: + start_date = datetime.strptime(start, '%Y%m%d') + end_date = datetime.strptime(end, '%Y%m%d') + except ValueError: + return "Invalid date format. Use YYYYMMDD.", 400 + + results = [] + for filename in os.listdir(UPLOAD_FOLDER): + if filename.endswith('.pdf'): + parts = filename.rsplit('_', 2) + if len(parts) == 3: + _, doc_id, date_str = parts + date_str = date_str.replace('.pdf', '') + try: + file_date = datetime.strptime(date_str, '%Y%m%d') + if start_date <= file_date <= end_date: + results.append({ + "doc_id": doc_id, + "date": date_str, + "file_name": filename + }) + except ValueError: + continue + + return jsonify({"documents": results}) + +if __name__ == '__main__': + app.run(host='0.0.0.0', port=5000) + diff --git a/src/dummy_onbase/data/.keep b/src/dummy_onbase/data/.keep new file mode 100644 index 0000000..e69de29 diff --git a/src/dummy_onbase/templates/upload.html b/src/dummy_onbase/templates/upload.html new file mode 100644 index 0000000..d73ca20 --- /dev/null +++ b/src/dummy_onbase/templates/upload.html @@ -0,0 +1,21 @@ + + + + Upload a Text File + + +

Upload a .pdf File

+
+ +

+ +
+ +

Uploaded Files:

+ + +