Spaces:
Sleeping
Sleeping
import os | |
import torch | |
import PyPDF2 | |
from io import BytesIO | |
from PIL import Image | |
from transformers import BlipProcessor, BlipForQuestionAnswering | |
import streamlit as st | |
from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader | |
from llama_index.embeddings.fastembed import FastEmbedEmbedding | |
from llama_index.llms.gemini import Gemini | |
# Configure FastEmbed and Google Gemini | |
Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5") | |
Settings.llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.5, model_name="models/gemini-pro") | |
# Global variables to avoid reloading models | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
blip_vqa_processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base") | |
blip_vqa_model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base").to(device) | |
def write_to_file(content, filename): | |
os.makedirs(os.path.dirname(filename), exist_ok=True) | |
with open(filename, "wb") as f: | |
f.write(content) | |
def answer_question_about_image(image, question): | |
inputs = blip_vqa_processor(image, question, return_tensors="pt").to(device) | |
with torch.no_grad(): | |
out = blip_vqa_model.generate(**inputs) | |
answer = blip_vqa_processor.decode(out[0], skip_special_tokens=True) | |
return answer | |
def extract_text_and_images_from_pdf(pdf_file): | |
pdf_reader = PyPDF2.PdfReader(BytesIO(pdf_file.getvalue())) | |
text = "" | |
images = [] | |
for page in pdf_reader.pages: | |
text += page.extract_text() | |
x_objects = page.get('/Resources').get('/XObject') | |
if x_objects: | |
for obj in x_objects: | |
if x_objects[obj]['/Subtype'] == '/Image': | |
img_data = x_objects[obj]._data | |
images.append(img_data) | |
return text, images | |
def ingest_documents(): | |
reader = SimpleDirectoryReader("./files/") | |
documents = reader.load_data() | |
return documents | |
def load_data(documents): | |
index = VectorStoreIndex.from_documents(documents) | |
return index | |
def generate_summary(index, document_text, query, target_language): | |
query_engine = index.as_query_engine() | |
response = query_engine.query(f""" | |
You are a multilingual analyst and translator. Translate the query into English, analyze the document based on the translated query, | |
and then translate the response back into {target_language}. | |
Query: {query} | |
Document: {document_text} | |
Please cover the following aspects: | |
1. Key insights related to the query | |
2. Explanation based on the content of the document | |
3. Any relevant comparisons or conclusions | |
Provide a clear, concise, and professional response in {target_language}. | |
""") | |
return response.response.strip() | |
# Streamlit app | |
def main(): | |
st.title("Multimodal and Multilingual Document Analyzer") | |
st.write("Upload a document (PDF, or image), ask questions in your preferred language, and get detailed analysis!") | |
uploaded_file = st.file_uploader("Choose a file", type=["pdf", "jpg", "png"]) | |
languages = { | |
'English': 'en', | |
'Hindi': 'hi', | |
'Kannada': 'kn', | |
'Spanish': 'es', | |
'French': 'fr', | |
'German': 'de', | |
} | |
selected_language = st.selectbox("Select your preferred language", list(languages.keys())) | |
target_language = languages[selected_language] | |
if uploaded_file is not None: | |
file_type = uploaded_file.type | |
st.write(f"Analyzing {file_type} file...") | |
try: | |
if file_type == "application/pdf": | |
document_text, images = extract_text_and_images_from_pdf(uploaded_file) | |
write_to_file(uploaded_file.getvalue(), "./files/uploaded.pdf") | |
for img_data in images: | |
image = Image.open(BytesIO(img_data)) | |
st.image(image, use_column_width=True) | |
elif file_type in ["image/jpeg", "image/png"]: | |
image = Image.open(BytesIO(uploaded_file.getvalue())) | |
document_text = "" | |
st.image(image, use_column_width=True) | |
write_to_file(uploaded_file.getvalue(), "./files/uploaded_image") | |
else: | |
st.error("Unsupported file type") | |
return | |
documents = ingest_documents() | |
index = load_data(documents) | |
query = st.text_input(f"Enter your query in {selected_language}", "") | |
if st.button("Ask"): | |
if query: | |
if file_type in ["image/jpeg", "image/png"]: | |
answer = answer_question_about_image(image, query) | |
st.write(f"**Direct Answer:** {answer}") | |
summary = generate_summary(index, f"Image query: {query}\nAnswer: {answer}", query, target_language) | |
else: | |
summary = generate_summary(index, document_text, query, target_language) | |
st.write("## Analysis") | |
st.write(f"**Query:** {query}") | |
st.write("## Summary") | |
st.write(summary) | |
except Exception as e: | |
st.error(f"An error occurred: {str(e)}") | |
st.write("Please try uploading the file again or try a different file.") | |
if __name__ == "__main__": | |
main() | |