import os import torch import PyPDF2 from io import BytesIO from PIL import Image from transformers import BlipProcessor, BlipForQuestionAnswering import streamlit as st from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader from llama_index.embeddings.fastembed import FastEmbedEmbedding from llama_index.llms.gemini import Gemini # Configure FastEmbed and Google Gemini Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5") Settings.llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.5, model_name="models/gemini-pro") # Global variables to avoid reloading models device = "cuda" if torch.cuda.is_available() else "cpu" blip_vqa_processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base") blip_vqa_model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base").to(device) def write_to_file(content, filename): os.makedirs(os.path.dirname(filename), exist_ok=True) with open(filename, "wb") as f: f.write(content) def answer_question_about_image(image, question): inputs = blip_vqa_processor(image, question, return_tensors="pt").to(device) with torch.no_grad(): out = blip_vqa_model.generate(**inputs) answer = blip_vqa_processor.decode(out[0], skip_special_tokens=True) return answer def extract_text_and_images_from_pdf(pdf_file): pdf_reader = PyPDF2.PdfReader(BytesIO(pdf_file.getvalue())) text = "" images = [] for page in pdf_reader.pages: text += page.extract_text() x_objects = page.get('/Resources').get('/XObject') if x_objects: for obj in x_objects: if x_objects[obj]['/Subtype'] == '/Image': img_data = x_objects[obj]._data images.append(img_data) return text, images def ingest_documents(): reader = SimpleDirectoryReader("./files/") documents = reader.load_data() return documents def load_data(documents): index = VectorStoreIndex.from_documents(documents) return index def generate_summary(index, document_text, query, target_language): query_engine = index.as_query_engine() response = query_engine.query(f""" You are a multilingual analyst and translator. Translate the query into English, analyze the document based on the translated query, and then translate the response back into {target_language}. Query: {query} Document: {document_text} Please cover the following aspects: 1. Key insights related to the query 2. Explanation based on the content of the document 3. Any relevant comparisons or conclusions Provide a clear, concise, and professional response in {target_language}. """) return response.response.strip() # Streamlit app def main(): st.title("Multimodal and Multilingual Document Analyzer") st.write("Upload a document (PDF, or image), ask questions in your preferred language, and get detailed analysis!") uploaded_file = st.file_uploader("Choose a file", type=["pdf", "jpg", "png"]) languages = { 'English': 'en', 'Hindi': 'hi', 'Kannada': 'kn', 'Spanish': 'es', 'French': 'fr', 'German': 'de', } selected_language = st.selectbox("Select your preferred language", list(languages.keys())) target_language = languages[selected_language] if uploaded_file is not None: file_type = uploaded_file.type st.write(f"Analyzing {file_type} file...") try: if file_type == "application/pdf": document_text, images = extract_text_and_images_from_pdf(uploaded_file) write_to_file(uploaded_file.getvalue(), "./files/uploaded.pdf") for img_data in images: image = Image.open(BytesIO(img_data)) st.image(image, use_column_width=True) elif file_type in ["image/jpeg", "image/png"]: image = Image.open(BytesIO(uploaded_file.getvalue())) document_text = "" st.image(image, use_column_width=True) write_to_file(uploaded_file.getvalue(), "./files/uploaded_image") else: st.error("Unsupported file type") return documents = ingest_documents() index = load_data(documents) query = st.text_input(f"Enter your query in {selected_language}", "") if st.button("Ask"): if query: if file_type in ["image/jpeg", "image/png"]: answer = answer_question_about_image(image, query) st.write(f"**Direct Answer:** {answer}") summary = generate_summary(index, f"Image query: {query}\nAnswer: {answer}", query, target_language) else: summary = generate_summary(index, document_text, query, target_language) st.write("## Analysis") st.write(f"**Query:** {query}") st.write("## Summary") st.write(summary) except Exception as e: st.error(f"An error occurred: {str(e)}") st.write("Please try uploading the file again or try a different file.") if __name__ == "__main__": main()