import streamlit as st from PIL import Image import io from dotenv import load_dotenv import os import openai from openai import OpenAI # Google Cloud Vision from google.cloud import vision # Load environment variables load_dotenv() # Set Google Cloud credentials in environment service_account_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS") os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = r'gcv-new-project-dd6ed833cc91.json' # Initialize Google Vision client vision_client = vision.ImageAnnotatorClient() def extract_text_with_google_vision_api(image): """Extract structured text from image using Google Cloud Vision API, with additional formatting based on bounding box analysis.""" import io from google.cloud import vision vision_client = vision.ImageAnnotatorClient() img_byte_arr = io.BytesIO() image.save(img_byte_arr, format=image.format) image_bytes = img_byte_arr.getvalue() image = vision.Image(content=image_bytes) response = vision_client.document_text_detection(image=image) structured_texts = [] for page in response.full_text_annotation.pages: for block in page.blocks: block_texts = [] last_paragraph_bottom = None # Store the bottom position of the last paragraph to compare spacing for paragraph in block.paragraphs: paragraph_text = ' '.join([''.join([symbol.text for symbol in word.symbols]) for word in paragraph.words]) paragraph_text = paragraph_text.strip() # Example for analyzing bounding box position and size (simplified) paragraph_bounds = paragraph.bounding_box top_left = paragraph_bounds.vertices[0] # Example vertex if last_paragraph_bottom is not None and (top_left.y - last_paragraph_bottom) > 10: # Add additional break if the spacing exceeds some threshold block_texts.append("") # This adds an extra line break to indicate a significant separation # Update last_paragraph_bottom to the current paragraph's bottom position last_paragraph_bottom = paragraph_bounds.vertices[2].y # Assuming 0 is top-left and going clockwise if len(paragraph_text.split()) > 2: block_texts.append(paragraph_text) if block_texts: structured_texts.append('\n'.join(block_texts)) if structured_texts: return '\n\n'.join(structured_texts) else: return "No structured text found." def main(): st.title("Article Extraction") uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"]) if uploaded_file is not None: image = Image.open(uploaded_file) st.image(image, caption="Uploaded Image", use_column_width=True) extracted_text = extract_text_with_google_vision_api(image) st.success("Analysis completed successfully!") st.header("Extracted Text:") st.write(extracted_text if extracted_text else "No text detected.") if __name__ == "__main__": main()