Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 29 additions & 7 deletions src/_tests/s3_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,40 @@
import os
import streamlit as st
import pandas as pd
import pytest
import time
from pages.Upload import get_unique_filename

s3 = s3fs.S3FileSystem(
anon=False,
key = st.secrets["S3_KEY"],
secret = st.secrets["S3_SECRET"]
)
s3.ls('s3://ocr-database-s3')

wanted_files = [x for x in os.listdir('.') if x.endswith('.csv')][0]
def test_get_unique_filename():
# Setup test environment
test_path = f's3://{st.secrets["S3_BUCKET_NAME"]}/test'
test_filename = 'test_file.pdf'

# Create a test file
with s3.open(os.path.join(test_path, test_filename), 'wb') as f:
f.write(b'test content')

# Test unique filename generation
unique_name = get_unique_filename(s3, test_path, test_filename)
assert unique_name != test_filename
assert unique_name.startswith('test_file_')
assert unique_name.endswith('.pdf')

# Cleanup
s3.rm(os.path.join(test_path, test_filename))

# Upload file
s3.put(wanted_files, 's3://ocr-database-s3/' + wanted_files)

# Load cvs to dataframe
df = pd.read_csv(s3.open('s3://ocr-database-s3/' + wanted_files))
def test_file_upload():
wanted_files = [x for x in os.listdir('.') if x.endswith('.csv')][0]

# Upload file
s3.put(wanted_files, 's3://ocr-database-s3/' + wanted_files)

# Load csv to dataframe
df = pd.read_csv(s3.open('s3://ocr-database-s3/' + wanted_files))
assert not df.empty
14 changes: 13 additions & 1 deletion src/pages/Upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,17 @@
from datetime import datetime
from PyPDF2 import PdfWriter, PdfReader
import s3fs
import time

def get_unique_filename(s3, path, filename):
"""Generate a unique filename if a conflict is detected"""
base, ext = os.path.splitext(filename)
counter = 1
new_filename = filename
while s3.exists(os.path.join(path, new_filename)):
new_filename = f"{base}_{int(time.time())}_{counter}{ext}"
counter += 1
return new_filename

s3 = s3fs.S3FileSystem(
anon=False,
Expand Down Expand Up @@ -56,9 +67,10 @@
output = PdfWriter()
output.add_page(page)

base_filename = get_unique_filename(s3, save_path, uploaded_file.name)
save_page_path = os.path.join(
save_path,
uploaded_file.name.split('.')[0] + '_' + str(i) + '.pdf'
base_filename.split('.')[0] + '_' + str(i) + '.pdf'
)

# with open(save_page_path, 'wb') as f:
Expand Down