mkoot007 commited on
Commit
37f9b04
·
1 Parent(s): 295f37f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -21
app.py CHANGED
@@ -1,15 +1,21 @@
1
  import gradio as gr
2
  import re
3
  from docx import Document
4
- from PyPDF2 import PdfReader # Use PdfReader from PyPDF2
5
 
6
  # Function to extract text from a PDF file
7
  def extract_text_from_pdf(pdf_file):
8
  text = ""
9
- pdf = PdfReader(pdf_file)
10
- for page in pdf.pages:
11
- text += page.extract_text()
12
- return text
 
 
 
 
 
 
13
 
14
  # Function to extract text from a DOCX file
15
  def extract_text_from_docx(docx_file):
@@ -35,23 +41,10 @@ def extract_info_from_resume(resume_path):
35
  email = re.search(email_pattern, text)
36
  phone = re.search(phone_pattern, text)
37
 
38
- if name:
39
- name = name.group()
40
- else:
41
- name = "Name not found"
42
- if email:
43
- email = email.group()
44
- else:
45
- email = "Email not found"
46
- if phone:
47
- phone = phone.group()
48
- else:
49
- phone = "Phone number not found"
50
-
51
  extracted_info = {
52
- "Name": name,
53
- "Email": email,
54
- "Phone": phone,
55
  }
56
 
57
  return extracted_info
 
1
  import gradio as gr
2
  import re
3
  from docx import Document
4
+ from PyPDF2 import PdfFileReader
5
 
6
  # Function to extract text from a PDF file
7
  def extract_text_from_pdf(pdf_file):
8
  text = ""
9
+ try:
10
+ pdf = PdfFileReader(pdf_file)
11
+ if pdf.numPages == 0:
12
+ raise ValueError("The PDF file is empty.")
13
+ for page_num in range(pdf.getNumPages()):
14
+ page = pdf.getPage(page_num)
15
+ text += page.extractText()
16
+ return text
17
+ except Exception as e:
18
+ raise ValueError("Error reading the PDF file: " + str(e)
19
 
20
  # Function to extract text from a DOCX file
21
  def extract_text_from_docx(docx_file):
 
41
  email = re.search(email_pattern, text)
42
  phone = re.search(phone_pattern, text)
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  extracted_info = {
45
+ "Name": name.group() if name else "Name not found",
46
+ "Email": email.group() if email else "Email not found",
47
+ "Phone": phone.group() if phone else "Phone number not found",
48
  }
49
 
50
  return extracted_info