diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..26d3352
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,3 @@
+# Default ignored files
+/shelf/
+/workspace.xml
diff --git a/.idea/.name b/.idea/.name
new file mode 100644
index 0000000..aa15e3e
--- /dev/null
+++ b/.idea/.name
@@ -0,0 +1 @@
+app.py
\ No newline at end of file
diff --git a/.idea/Env1.iml b/.idea/Env1.iml
new file mode 100644
index 0000000..e110c18
--- /dev/null
+++ b/.idea/Env1.iml
@@ -0,0 +1,17 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
new file mode 100644
index 0000000..054a04d
--- /dev/null
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,28 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..e2445a2
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..cd30fdd
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..35eb1dd
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/app.py b/app.py
index 59194f7..9b731ae 100644
--- a/app.py
+++ b/app.py
@@ -15,13 +15,11 @@ def editor():
return render_template('editor.html')
-
@app.route('/submit', methods=['Post'])
def submit_data():
data = request.json
words = re.findall(r'\b\w+\b', data.lower())
word_counts = Counter(words)
- # print(f'Word count: {word_counts}')
email = extracted_email(data)
phoneNumber = extracted_phoneNumber(data)
print(f'Data: {data}, Email: {email}, Phone: {phoneNumber}')
@@ -44,5 +42,6 @@ def extracted_phoneNumber(data):
+main
if __name__ == '__main__':
app.run(debug=True)
\ No newline at end of file
diff --git a/pdfReader.py b/pdfReader.py
new file mode 100644
index 0000000..f2b40c8
--- /dev/null
+++ b/pdfReader.py
@@ -0,0 +1,78 @@
+from pypdf import PdfReader
+import re
+
+
+def cleaner(list_array, item):
+ res = [i for i in list_array if i != item]
+ return res
+
+
+def pefReader(data):
+
+ reader = PdfReader(data)
+ content = reader.pages[0].extract_text() + reader.pages[1].extract_text()
+ content_lower = content.lower()
+ education_array = []
+ work_array = []
+ summary_array = []
+ personal_data = []
+ content_array = content_lower.split(' ')
+ elements = ' '.join(content_array)
+ pattern_2 = r' {2,}'
+ results = re.sub(pattern_2, ' ', elements)
+ result_arr = results.split(' ')
+ result_arr.remove('')
+ new_arr = cleaner(result_arr, '')
+ try:
+ end_of_summary = new_arr.index('\neducation:')
+ end_of_edu = new_arr.index('\nwork')
+ end_of_work = new_arr.index('\nleadership')
+ end_of_publications = new_arr.index('')
+ end_of_bio = new_arr.index('\nobjective')
+
+ except ValueError:
+ print('Value does not exist')
+ try:
+ for i, x in enumerate(new_arr):
+ if x.strip() == "education:": # stripe function takes care of the \n that is present with education just like it is with every new line. also regex may be better # since characters like : will cause a probelm with indiviusal word search.
+ for i in range(i, end_of_edu):
+ education_array.append(new_arr[i])
+ except IndexError:
+ print('Educational experience not found')
+
+ try:
+ for a, b in enumerate(new_arr):
+ if b.strip() == "work" and new_arr[a + 1].strip() == "experience:":
+ for a in range(a, end_of_work):
+ work_array.append(new_arr[a])
+ except IndexError:
+ print('Work experience not found')
+
+ try:
+ for c, d in enumerate(new_arr):
+ if d.strip() == "objective" or d.strip() == "summary":
+ for c in range(c, end_of_summary):
+ summary_array.append(new_arr[c])
+ except IndexError:
+ print('Professional summary not found')
+
+ try:
+ for e, f in enumerate(new_arr):
+ if f.strip() == "publications" or d.strip() == "projects":
+ for c in range(c, end_of_publications):
+ summary_array.append(new_arr[c])
+ except IndexError:
+ print('Publications not found')
+
+ try:
+ for g, h in enumerate(new_arr[0:15]):
+ personal_data.append(h)
+ except IndexError:
+ print('Publications not found')
+
+ #print(new_arr)
+ print(f'\n Personal Data: {personal_data} \n\n Candidate Objective: {summary_array} \n\n '
+ f'Educational experince : {education_array} \n\n Work Experience : {work_array}')
+
+
+pefReader('MMujtaba-CV.pdf')