Spaces:
Sleeping
Sleeping
rolwinpinto
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -3,10 +3,9 @@ import torch
|
|
3 |
import PyPDF2
|
4 |
from io import BytesIO
|
5 |
from PIL import Image
|
6 |
-
from transformers import BlipProcessor,
|
7 |
import streamlit as st
|
8 |
-
|
9 |
-
from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader, Document
|
10 |
from llama_index.embeddings.fastembed import FastEmbedEmbedding
|
11 |
from llama_index.llms.gemini import Gemini
|
12 |
|
@@ -14,44 +13,40 @@ from llama_index.llms.gemini import Gemini
|
|
14 |
Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5")
|
15 |
Settings.llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.5, model_name="models/gemini-pro")
|
16 |
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
18 |
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
19 |
with open(filename, "wb") as f:
|
20 |
f.write(content)
|
21 |
|
22 |
-
def process_image(image_bytes):
|
23 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
24 |
-
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
|
25 |
-
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
|
26 |
-
|
27 |
-
image = Image.open(BytesIO(image_bytes))
|
28 |
-
inputs = processor(images=image, return_tensors="pt").to(device)
|
29 |
-
|
30 |
-
with torch.no_grad():
|
31 |
-
caption = model.generate(**inputs, max_length=50)
|
32 |
-
caption_text = processor.decode(caption[0], skip_special_tokens=True)
|
33 |
-
|
34 |
-
return caption_text, image
|
35 |
-
|
36 |
def answer_question_about_image(image, question):
|
37 |
-
|
38 |
-
processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
|
39 |
-
model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base").to(device)
|
40 |
-
|
41 |
-
inputs = processor(image, question, return_tensors="pt").to(device)
|
42 |
|
43 |
with torch.no_grad():
|
44 |
-
out =
|
45 |
-
answer =
|
46 |
|
47 |
return answer
|
48 |
|
49 |
-
def
|
50 |
pdf_reader = PyPDF2.PdfReader(BytesIO(pdf_file.getvalue()))
|
51 |
text = ""
|
|
|
|
|
52 |
for page in pdf_reader.pages:
|
53 |
text += page.extract_text()
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
def ingest_documents():
|
57 |
reader = SimpleDirectoryReader("./files/")
|
@@ -86,9 +81,9 @@ def generate_summary(index, document_text, query, target_language):
|
|
86 |
# Streamlit app
|
87 |
def main():
|
88 |
st.title("Multimodal and Multilingual Document Analyzer")
|
89 |
-
st.write("Upload a document (PDF,
|
90 |
|
91 |
-
uploaded_file = st.file_uploader("Choose a file", type=["pdf", "
|
92 |
|
93 |
languages = {
|
94 |
'English': 'en',
|
@@ -107,15 +102,15 @@ def main():
|
|
107 |
|
108 |
try:
|
109 |
if file_type == "application/pdf":
|
110 |
-
document_text =
|
111 |
write_to_file(uploaded_file.getvalue(), "./files/uploaded.pdf")
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
elif file_type in ["image/jpeg", "image/png"]:
|
116 |
-
|
117 |
-
document_text =
|
118 |
-
st.image(image,
|
119 |
write_to_file(uploaded_file.getvalue(), "./files/uploaded_image")
|
120 |
else:
|
121 |
st.error("Unsupported file type")
|
@@ -130,7 +125,7 @@ def main():
|
|
130 |
if file_type in ["image/jpeg", "image/png"]:
|
131 |
answer = answer_question_about_image(image, query)
|
132 |
st.write(f"**Direct Answer:** {answer}")
|
133 |
-
summary = generate_summary(index, f"Image
|
134 |
else:
|
135 |
summary = generate_summary(index, document_text, query, target_language)
|
136 |
|
@@ -144,4 +139,4 @@ def main():
|
|
144 |
st.write("Please try uploading the file again or try a different file.")
|
145 |
|
146 |
if __name__ == "__main__":
|
147 |
-
main()
|
|
|
3 |
import PyPDF2
|
4 |
from io import BytesIO
|
5 |
from PIL import Image
|
6 |
+
from transformers import BlipProcessor, BlipForQuestionAnswering
|
7 |
import streamlit as st
|
8 |
+
from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader
|
|
|
9 |
from llama_index.embeddings.fastembed import FastEmbedEmbedding
|
10 |
from llama_index.llms.gemini import Gemini
|
11 |
|
|
|
13 |
Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5")
|
14 |
Settings.llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.5, model_name="models/gemini-pro")
|
15 |
|
16 |
+
# Global variables to avoid reloading models
|
17 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
18 |
+
blip_vqa_processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
|
19 |
+
blip_vqa_model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base").to(device)
|
20 |
+
|
21 |
+
def write_to_file(content, filename):
|
22 |
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
23 |
with open(filename, "wb") as f:
|
24 |
f.write(content)
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
def answer_question_about_image(image, question):
|
27 |
+
inputs = blip_vqa_processor(image, question, return_tensors="pt").to(device)
|
|
|
|
|
|
|
|
|
28 |
|
29 |
with torch.no_grad():
|
30 |
+
out = blip_vqa_model.generate(**inputs)
|
31 |
+
answer = blip_vqa_processor.decode(out[0], skip_special_tokens=True)
|
32 |
|
33 |
return answer
|
34 |
|
35 |
+
def extract_text_and_images_from_pdf(pdf_file):
|
36 |
pdf_reader = PyPDF2.PdfReader(BytesIO(pdf_file.getvalue()))
|
37 |
text = ""
|
38 |
+
images = []
|
39 |
+
|
40 |
for page in pdf_reader.pages:
|
41 |
text += page.extract_text()
|
42 |
+
x_objects = page.get('/Resources').get('/XObject')
|
43 |
+
if x_objects:
|
44 |
+
for obj in x_objects:
|
45 |
+
if x_objects[obj]['/Subtype'] == '/Image':
|
46 |
+
img_data = x_objects[obj]._data
|
47 |
+
images.append(img_data)
|
48 |
+
|
49 |
+
return text, images
|
50 |
|
51 |
def ingest_documents():
|
52 |
reader = SimpleDirectoryReader("./files/")
|
|
|
81 |
# Streamlit app
|
82 |
def main():
|
83 |
st.title("Multimodal and Multilingual Document Analyzer")
|
84 |
+
st.write("Upload a document (PDF, or image), ask questions in your preferred language, and get detailed analysis!")
|
85 |
|
86 |
+
uploaded_file = st.file_uploader("Choose a file", type=["pdf", "jpg", "png"])
|
87 |
|
88 |
languages = {
|
89 |
'English': 'en',
|
|
|
102 |
|
103 |
try:
|
104 |
if file_type == "application/pdf":
|
105 |
+
document_text, images = extract_text_and_images_from_pdf(uploaded_file)
|
106 |
write_to_file(uploaded_file.getvalue(), "./files/uploaded.pdf")
|
107 |
+
for img_data in images:
|
108 |
+
image = Image.open(BytesIO(img_data))
|
109 |
+
st.image(image, use_column_width=True)
|
110 |
elif file_type in ["image/jpeg", "image/png"]:
|
111 |
+
image = Image.open(BytesIO(uploaded_file.getvalue()))
|
112 |
+
document_text = ""
|
113 |
+
st.image(image, use_column_width=True)
|
114 |
write_to_file(uploaded_file.getvalue(), "./files/uploaded_image")
|
115 |
else:
|
116 |
st.error("Unsupported file type")
|
|
|
125 |
if file_type in ["image/jpeg", "image/png"]:
|
126 |
answer = answer_question_about_image(image, query)
|
127 |
st.write(f"**Direct Answer:** {answer}")
|
128 |
+
summary = generate_summary(index, f"Image query: {query}\nAnswer: {answer}", query, target_language)
|
129 |
else:
|
130 |
summary = generate_summary(index, document_text, query, target_language)
|
131 |
|
|
|
139 |
st.write("Please try uploading the file again or try a different file.")
|
140 |
|
141 |
if __name__ == "__main__":
|
142 |
+
main()
|