Lingualizer / app.py
rolwinpinto's picture
Update app.py
fb64d37 verified
import os
import torch
import PyPDF2
from io import BytesIO
from PIL import Image
from transformers import BlipProcessor, BlipForQuestionAnswering
import streamlit as st
from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader
from llama_index.embeddings.fastembed import FastEmbedEmbedding
from llama_index.llms.gemini import Gemini
# Configure FastEmbed and Google Gemini
Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5")
Settings.llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.5, model_name="models/gemini-pro")
# Global variables to avoid reloading models
device = "cuda" if torch.cuda.is_available() else "cpu"
blip_vqa_processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
blip_vqa_model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base").to(device)
def write_to_file(content, filename):
os.makedirs(os.path.dirname(filename), exist_ok=True)
with open(filename, "wb") as f:
f.write(content)
def answer_question_about_image(image, question):
inputs = blip_vqa_processor(image, question, return_tensors="pt").to(device)
with torch.no_grad():
out = blip_vqa_model.generate(**inputs)
answer = blip_vqa_processor.decode(out[0], skip_special_tokens=True)
return answer
def extract_text_and_images_from_pdf(pdf_file):
pdf_reader = PyPDF2.PdfReader(BytesIO(pdf_file.getvalue()))
text = ""
images = []
for page in pdf_reader.pages:
text += page.extract_text()
x_objects = page.get('/Resources').get('/XObject')
if x_objects:
for obj in x_objects:
if x_objects[obj]['/Subtype'] == '/Image':
img_data = x_objects[obj]._data
images.append(img_data)
return text, images
def ingest_documents():
reader = SimpleDirectoryReader("./files/")
documents = reader.load_data()
return documents
def load_data(documents):
index = VectorStoreIndex.from_documents(documents)
return index
def generate_summary(index, document_text, query, target_language):
query_engine = index.as_query_engine()
response = query_engine.query(f"""
You are a multilingual analyst and translator. Translate the query into English, analyze the document based on the translated query,
and then translate the response back into {target_language}.
Query: {query}
Document: {document_text}
Please cover the following aspects:
1. Key insights related to the query
2. Explanation based on the content of the document
3. Any relevant comparisons or conclusions
Provide a clear, concise, and professional response in {target_language}.
""")
return response.response.strip()
# Streamlit app
def main():
st.title("Multimodal and Multilingual Document Analyzer")
st.write("Upload a document (PDF, or image), ask questions in your preferred language, and get detailed analysis!")
uploaded_file = st.file_uploader("Choose a file", type=["pdf", "jpg", "png"])
languages = {
'English': 'en',
'Hindi': 'hi',
'Kannada': 'kn',
'Spanish': 'es',
'French': 'fr',
'German': 'de',
}
selected_language = st.selectbox("Select your preferred language", list(languages.keys()))
target_language = languages[selected_language]
if uploaded_file is not None:
file_type = uploaded_file.type
st.write(f"Analyzing {file_type} file...")
try:
if file_type == "application/pdf":
document_text, images = extract_text_and_images_from_pdf(uploaded_file)
write_to_file(uploaded_file.getvalue(), "./files/uploaded.pdf")
for img_data in images:
image = Image.open(BytesIO(img_data))
st.image(image, use_column_width=True)
elif file_type in ["image/jpeg", "image/png"]:
image = Image.open(BytesIO(uploaded_file.getvalue()))
document_text = ""
st.image(image, use_column_width=True)
write_to_file(uploaded_file.getvalue(), "./files/uploaded_image")
else:
st.error("Unsupported file type")
return
documents = ingest_documents()
index = load_data(documents)
query = st.text_input(f"Enter your query in {selected_language}", "")
if st.button("Ask"):
if query:
if file_type in ["image/jpeg", "image/png"]:
answer = answer_question_about_image(image, query)
st.write(f"**Direct Answer:** {answer}")
summary = generate_summary(index, f"Image query: {query}\nAnswer: {answer}", query, target_language)
else:
summary = generate_summary(index, document_text, query, target_language)
st.write("## Analysis")
st.write(f"**Query:** {query}")
st.write("## Summary")
st.write(summary)
except Exception as e:
st.error(f"An error occurred: {str(e)}")
st.write("Please try uploading the file again or try a different file.")
if __name__ == "__main__":
main()