heisenberg3376 commited on
Commit
f1e2997
1 Parent(s): 8ba6fe8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -4
app.py CHANGED
@@ -4,6 +4,7 @@ import pdfplumber
4
  import transformers
5
  from transformers import pipeline
6
  from transformers import AutoTokenizer, AutoModelForQuestionAnswering
 
7
 
8
  tokenizer = AutoTokenizer.from_pretrained("aware-ai/bart-squadv2")
9
  model = AutoModelForQuestionAnswering.from_pretrained("aware-ai/bart-squadv2")
@@ -43,12 +44,15 @@ def answer_question(question, pdf_text):
43
 
44
  import pdfplumber
45
 
 
 
46
  def extract_text_from_pdf(pdf_path):
47
  text = ""
48
- with pdfplumber.open(pdf_path) as pdf:
49
- for page in pdf.pages:
50
- text += page.extract_text()
51
- return text
 
52
 
53
 
54
  def chat_with_pdf(question, pdf):
 
4
  import transformers
5
  from transformers import pipeline
6
  from transformers import AutoTokenizer, AutoModelForQuestionAnswering
7
+ import fitz
8
 
9
  tokenizer = AutoTokenizer.from_pretrained("aware-ai/bart-squadv2")
10
  model = AutoModelForQuestionAnswering.from_pretrained("aware-ai/bart-squadv2")
 
44
 
45
  import pdfplumber
46
 
47
+ import fitz # PyMuPDF
48
+
49
  def extract_text_from_pdf(pdf_path):
50
  text = ""
51
+ pdf = fitz.open(pdf_path)
52
+ for page_num in range(len(pdf)):
53
+ page = pdf[page_num]
54
+ text += page.get_text()
55
+ return text
56
 
57
 
58
  def chat_with_pdf(question, pdf):