From d09ed6826f77145f4415ba65dc1154cdb9cf87d3 Mon Sep 17 00:00:00 2001 From: "Rajeshkumar.P" Date: Sun, 5 Feb 2017 22:17:39 +0530 Subject: [PATCH 1/2] Updated to Code to handle Image update from a Folder --- google-ocr.py | 44 ++++++++++++++++++++++++++++------- googleocrRR.py | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 8 deletions(-) create mode 100644 googleocrRR.py diff --git a/google-ocr.py b/google-ocr.py index 796478a..d05a0e6 100644 --- a/google-ocr.py +++ b/google-ocr.py @@ -1,10 +1,36 @@ -import glob +import argparse import os +import glob + +parser = argparse.ArgumentParser(description="Convert Images in Local Folder into Text Files in Local Folder by uploading in Google Drive Folder") +parser.add_argument("-ip","--LocalImageFolderPath", help='Local Images Folder Path',default=os.getcwd()) +parser.add_argument("-tp","--LocalTextFolderPath", help="Local Text Files FolderPath",default=os.getcwd()) +parser.add_argument("-gf","--GoogleDriveFolderName", help="Google Drive Folder Name",default="OCRFolder") +parser.add_argument("-o","--Output", help="Local Output Text File Name",default="ocr-result") + +args = parser.parse_args() + +locImagePath = args.LocalImageFolderPath +locTextPath = args.LocalTextFolderPath +gdFolderName = args.GoogleDriveFolderName +resultFileName = args.Output + +filetypes = ('*.jpg','*.jpeg','*.gif','*.png') + +# List of Image Files to be OCRed +files=[] +for afiletype in filetypes: + files.extend(glob.glob(os.path.join(locImagePath,afiletype))) + print glob.glob(os.path.join(locImagePath,afiletype)) -files = [] -for filename in glob.glob('*.jpg'): - files.append(filename) +print "***************" + + +command = 'gdmkdir.py '+ gdFolderName + " > FolderCreation.log" + +print "running " + command +os.system(command) for image in sorted(files): print "uploading " + image @@ -17,14 +43,16 @@ for line in resultfile: if "id:" in line: fileid = line.split(":")[1].strip() - filename = image.split(".")[0] + ".txt" - get_command = "gdget.py -f txt -s " + filename + " " + fileid + filename = image.split(".")[0] + get_command = "gdget.py -f odt -s " + filename + ".odt " + fileid print "running "+ get_command os.system(get_command) + get_command = "gdget.py -f txt -s " + filename + ".txt " + fileid + os.system(get_command) + +print "Merging all text files into ocr-result.odt" -print "Merging all text files into ocr-result.txt" - files = glob.glob('*.txt' ) with open('ocr-result.txt', 'w' ) as result: diff --git a/googleocrRR.py b/googleocrRR.py new file mode 100644 index 0000000..d05a0e6 --- /dev/null +++ b/googleocrRR.py @@ -0,0 +1,63 @@ +import argparse +import os +import glob + +parser = argparse.ArgumentParser(description="Convert Images in Local Folder into Text Files in Local Folder by uploading in Google Drive Folder") +parser.add_argument("-ip","--LocalImageFolderPath", help='Local Images Folder Path',default=os.getcwd()) +parser.add_argument("-tp","--LocalTextFolderPath", help="Local Text Files FolderPath",default=os.getcwd()) +parser.add_argument("-gf","--GoogleDriveFolderName", help="Google Drive Folder Name",default="OCRFolder") +parser.add_argument("-o","--Output", help="Local Output Text File Name",default="ocr-result") + +args = parser.parse_args() + +locImagePath = args.LocalImageFolderPath +locTextPath = args.LocalTextFolderPath +gdFolderName = args.GoogleDriveFolderName +resultFileName = args.Output + +filetypes = ('*.jpg','*.jpeg','*.gif','*.png') + +# List of Image Files to be OCRed +files=[] + +for afiletype in filetypes: + files.extend(glob.glob(os.path.join(locImagePath,afiletype))) + print glob.glob(os.path.join(locImagePath,afiletype)) + +print "***************" + + +command = 'gdmkdir.py '+ gdFolderName + " > FolderCreation.log" + +print "running " + command +os.system(command) + +for image in sorted(files): + print "uploading " + image + command = "gdput.py -t ocr " + image + " > result.log" + print "running " + command + os.system(command) + + resultfile = open("result.log","r").readlines() + + for line in resultfile: + if "id:" in line: + fileid = line.split(":")[1].strip() + filename = image.split(".")[0] + get_command = "gdget.py -f odt -s " + filename + ".odt " + fileid + print "running "+ get_command + os.system(get_command) + get_command = "gdget.py -f txt -s " + filename + ".txt " + fileid + os.system(get_command) + +print "Merging all text files into ocr-result.odt" + + +files = glob.glob('*.txt' ) + +with open('ocr-result.txt', 'w' ) as result: + for textfile in files: + for line in open( textfile, 'r' ): + result.write( line ) + +print "Done" From 4b90de845ad1991ac13e2913acc92af1be8ad034 Mon Sep 17 00:00:00 2001 From: rajeshkumargp Date: Sun, 5 Feb 2017 22:42:30 +0530 Subject: [PATCH 2/2] Delete googleocrRR.py Deleted Duplicate Files --- googleocrRR.py | 63 -------------------------------------------------- 1 file changed, 63 deletions(-) delete mode 100644 googleocrRR.py diff --git a/googleocrRR.py b/googleocrRR.py deleted file mode 100644 index d05a0e6..0000000 --- a/googleocrRR.py +++ /dev/null @@ -1,63 +0,0 @@ -import argparse -import os -import glob - -parser = argparse.ArgumentParser(description="Convert Images in Local Folder into Text Files in Local Folder by uploading in Google Drive Folder") -parser.add_argument("-ip","--LocalImageFolderPath", help='Local Images Folder Path',default=os.getcwd()) -parser.add_argument("-tp","--LocalTextFolderPath", help="Local Text Files FolderPath",default=os.getcwd()) -parser.add_argument("-gf","--GoogleDriveFolderName", help="Google Drive Folder Name",default="OCRFolder") -parser.add_argument("-o","--Output", help="Local Output Text File Name",default="ocr-result") - -args = parser.parse_args() - -locImagePath = args.LocalImageFolderPath -locTextPath = args.LocalTextFolderPath -gdFolderName = args.GoogleDriveFolderName -resultFileName = args.Output - -filetypes = ('*.jpg','*.jpeg','*.gif','*.png') - -# List of Image Files to be OCRed -files=[] - -for afiletype in filetypes: - files.extend(glob.glob(os.path.join(locImagePath,afiletype))) - print glob.glob(os.path.join(locImagePath,afiletype)) - -print "***************" - - -command = 'gdmkdir.py '+ gdFolderName + " > FolderCreation.log" - -print "running " + command -os.system(command) - -for image in sorted(files): - print "uploading " + image - command = "gdput.py -t ocr " + image + " > result.log" - print "running " + command - os.system(command) - - resultfile = open("result.log","r").readlines() - - for line in resultfile: - if "id:" in line: - fileid = line.split(":")[1].strip() - filename = image.split(".")[0] - get_command = "gdget.py -f odt -s " + filename + ".odt " + fileid - print "running "+ get_command - os.system(get_command) - get_command = "gdget.py -f txt -s " + filename + ".txt " + fileid - os.system(get_command) - -print "Merging all text files into ocr-result.odt" - - -files = glob.glob('*.txt' ) - -with open('ocr-result.txt', 'w' ) as result: - for textfile in files: - for line in open( textfile, 'r' ): - result.write( line ) - -print "Done"