Spaces:
Running
Running
PanigrahiNirma
commited on
Commit
•
9a5a690
1
Parent(s):
8a789bb
Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
3 |
from transformers.pipelines import pipeline
|
4 |
-
import
|
5 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
6 |
from sklearn.metrics.pairwise import cosine_similarity
|
7 |
import numpy as np
|
@@ -13,12 +13,7 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
13 |
qa_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
|
14 |
|
15 |
def read_pdf(file):
|
16 |
-
|
17 |
-
text = ""
|
18 |
-
for page_num in range(reader.numPages):
|
19 |
-
page = reader.getPage(page_num)
|
20 |
-
text += page.extract_text()
|
21 |
-
return text
|
22 |
|
23 |
def retrieve_relevant_text(question, context, top_n=5):
|
24 |
vectorizer = TfidfVectorizer().fit_transform([question] + context)
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
3 |
from transformers.pipelines import pipeline
|
4 |
+
from pdfminer.high_level import extract_text
|
5 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
6 |
from sklearn.metrics.pairwise import cosine_similarity
|
7 |
import numpy as np
|
|
|
13 |
qa_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
|
14 |
|
15 |
def read_pdf(file):
|
16 |
+
return extract_text(file)
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
def retrieve_relevant_text(question, context, top_n=5):
|
19 |
vectorizer = TfidfVectorizer().fit_transform([question] + context)
|