Spaces:
Sleeping
Sleeping
File size: 4,145 Bytes
8942d5c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import streamlit as st
import os
from PIL import Image
import io
import base64
import requests
import json
from pathlib import Path
# Ensure assets directory exists
Path("./assets").mkdir(parents=True, exist_ok=True)
# Function to call Groq API directly (avoiding the groq package)
def call_groq_api(image_base64, model, prompt):
api_key = os.environ.get("GROQ_API_KEY", "")
if not api_key:
return None, "Error: GROQ_API_KEY environment variable is not set."
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
payload = {
"model": model,
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{image_base64}"
}
}
]
}
],
"temperature": 0.1,
"max_tokens": 1000
}
try:
response = requests.post(
"https://api.groq.com/openai/v1/chat/completions",
headers=headers,
json=payload
)
response.raise_for_status()
return response.json()["choices"][0]["message"]["content"], None
except Exception as e:
return None, f"Error calling Groq API: {str(e)}"
# Page configuration
st.set_page_config(
page_title="Llama-3-2-90b-vision-preview",
page_icon="ποΈ",
layout="wide",
initial_sidebar_state="expanded"
)
# Add clear button to top right
col1, col2 = st.columns([6, 1])
with col1:
st.markdown("""
<img src="data:image/png;base64,{}" width="50" style="vertical-align: -12px;"> Llama-3-2-90b-vision-preview
""".format(base64.b64encode(open("img/llama.png", "rb").read()).decode()), unsafe_allow_html=True)
with col2:
if st.button("Clear ποΈ"):
if "ocr_result" in st.session_state:
del st.session_state["ocr_result"]
st.rerun()
st.markdown("Extract structured text from images using Vision Models!", unsafe_allow_html=True)
st.markdown("---")
# Move upload controls to sidebar
with st.sidebar:
st.header("Upload Image")
uploaded_file = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])
# Model selection
st.subheader("Model Settings")
model = st.selectbox(
"Select Vision Model",
["Llama-3-2-11b-vision-preview", "Llama-3-2-90b-vision-preview"],
index=0
)
if uploaded_file is not None:
# Display the uploaded image
image = Image.open(uploaded_file)
st.image(image, caption="Uploaded Image")
if st.button("Extract Text π", type="primary"):
with st.spinner("Processing image..."):
try:
# Convert image for API
buffered = io.BytesIO()
image.save(buffered, format="PNG")
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
# Prepare the prompt
prompt = """Analyze the text in the provided image. Extract all readable content
and present it in a structured Markdown format that is clear, concise,
and well-organized. Ensure proper formatting (e.g., headings, lists, or
code blocks) as necessary to represent the content effectively."""
# Call the API
result, error = call_groq_api(img_str, model, prompt)
if error:
st.error(error)
else:
st.session_state["ocr_result"] = result
except Exception as e:
st.error(f"Error processing image: {str(e)}")
# Main content area for results
if "ocr_result" in st.session_state:
st.markdown(st.session_state["ocr_result"])
else:
st.info("Upload an image and click 'Extract Text' to see the results here.")
# Footer
st.markdown("---")
st.markdown("Made using Vision Models via Groq API")
|