Spaces:
Sleeping
Sleeping
import streamlit as st | |
import os | |
from PIL import Image | |
import io | |
import base64 | |
import requests | |
import json | |
from pathlib import Path | |
# Ensure assets directory exists | |
Path("./assets").mkdir(parents=True, exist_ok=True) | |
# Function to call Groq API directly (avoiding the groq package) | |
def call_groq_api(image_base64, model, prompt): | |
api_key = os.environ.get("GROQ_API_KEY", "") | |
if not api_key: | |
return None, "Error: GROQ_API_KEY environment variable is not set." | |
headers = { | |
"Authorization": f"Bearer {api_key}", | |
"Content-Type": "application/json" | |
} | |
payload = { | |
"model": model, | |
"messages": [ | |
{ | |
"role": "user", | |
"content": [ | |
{ | |
"type": "text", | |
"text": prompt | |
}, | |
{ | |
"type": "image_url", | |
"image_url": { | |
"url": f"data:image/png;base64,{image_base64}" | |
} | |
} | |
] | |
} | |
], | |
"temperature": 0.1, | |
"max_tokens": 1000 | |
} | |
try: | |
response = requests.post( | |
"https://api.groq.com/openai/v1/chat/completions", | |
headers=headers, | |
json=payload | |
) | |
response.raise_for_status() | |
return response.json()["choices"][0]["message"]["content"], None | |
except Exception as e: | |
return None, f"Error calling Groq API: {str(e)}" | |
# Page configuration | |
st.set_page_config( | |
page_title="Llama-3-2-90b-vision-preview", | |
page_icon="ποΈ", | |
layout="wide", | |
initial_sidebar_state="expanded" | |
) | |
# Add clear button to top right | |
col1, col2 = st.columns([6, 1]) | |
with col1: | |
st.markdown(""" | |
<img src="data:image/png;base64,{}" width="50" style="vertical-align: -12px;"> Llama-3-2-90b-vision-preview | |
""".format(base64.b64encode(open("img/llama.png", "rb").read()).decode()), unsafe_allow_html=True) | |
with col2: | |
if st.button("Clear ποΈ"): | |
if "ocr_result" in st.session_state: | |
del st.session_state["ocr_result"] | |
st.rerun() | |
st.markdown("Extract structured text from images using Vision Models!", unsafe_allow_html=True) | |
st.markdown("---") | |
# Move upload controls to sidebar | |
with st.sidebar: | |
st.header("Upload Image") | |
uploaded_file = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"]) | |
# Model selection | |
st.subheader("Model Settings") | |
model = st.selectbox( | |
"Select Vision Model", | |
["Llama-3-2-11b-vision-preview", "Llama-3-2-90b-vision-preview"], | |
index=0 | |
) | |
if uploaded_file is not None: | |
# Display the uploaded image | |
image = Image.open(uploaded_file) | |
st.image(image, caption="Uploaded Image") | |
if st.button("Extract Text π", type="primary"): | |
with st.spinner("Processing image..."): | |
try: | |
# Convert image for API | |
buffered = io.BytesIO() | |
image.save(buffered, format="PNG") | |
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8") | |
# Prepare the prompt | |
prompt = """Analyze the text in the provided image. Extract all readable content | |
and present it in a structured Markdown format that is clear, concise, | |
and well-organized. Ensure proper formatting (e.g., headings, lists, or | |
code blocks) as necessary to represent the content effectively.""" | |
# Call the API | |
result, error = call_groq_api(img_str, model, prompt) | |
if error: | |
st.error(error) | |
else: | |
st.session_state["ocr_result"] = result | |
except Exception as e: | |
st.error(f"Error processing image: {str(e)}") | |
# Main content area for results | |
if "ocr_result" in st.session_state: | |
st.markdown(st.session_state["ocr_result"]) | |
else: | |
st.info("Upload an image and click 'Extract Text' to see the results here.") | |
# Footer | |
st.markdown("---") | |
st.markdown("Made using Vision Models via Groq API") | |