File size: 4,145 Bytes
8942d5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import streamlit as st
import os
from PIL import Image
import io
import base64
import requests
import json
from pathlib import Path

# Ensure assets directory exists
Path("./assets").mkdir(parents=True, exist_ok=True)

# Function to call Groq API directly (avoiding the groq package)
def call_groq_api(image_base64, model, prompt):
    api_key = os.environ.get("GROQ_API_KEY", "")

    if not api_key:
        return None, "Error: GROQ_API_KEY environment variable is not set."

    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json"
    }

    payload = {
        "model": model,
        "messages": [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": prompt
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/png;base64,{image_base64}"
                        }
                    }
                ]
            }
        ],
        "temperature": 0.1,
        "max_tokens": 1000
    }

    try:
        response = requests.post(
            "https://api.groq.com/openai/v1/chat/completions",
            headers=headers,
            json=payload
        )
        response.raise_for_status()
        return response.json()["choices"][0]["message"]["content"], None
    except Exception as e:
        return None, f"Error calling Groq API: {str(e)}"

# Page configuration
st.set_page_config(
    page_title="Llama-3-2-90b-vision-preview",
    page_icon="πŸ‘οΈ",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Add clear button to top right
col1, col2 = st.columns([6, 1])
with col1:
    st.markdown("""
    <img src="data:image/png;base64,{}" width="50" style="vertical-align: -12px;"> Llama-3-2-90b-vision-preview
    """.format(base64.b64encode(open("img/llama.png", "rb").read()).decode()), unsafe_allow_html=True)
with col2:
    if st.button("Clear πŸ—‘οΈ"):
        if "ocr_result" in st.session_state:
            del st.session_state["ocr_result"]
        st.rerun()

st.markdown("Extract structured text from images using Vision Models!", unsafe_allow_html=True)
st.markdown("---")

# Move upload controls to sidebar
with st.sidebar:
    st.header("Upload Image")
    uploaded_file = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])

    # Model selection
    st.subheader("Model Settings")
    model = st.selectbox(
        "Select Vision Model",
        ["Llama-3-2-11b-vision-preview", "Llama-3-2-90b-vision-preview"],
        index=0
    )

if uploaded_file is not None:
    # Display the uploaded image
    image = Image.open(uploaded_file)
    st.image(image, caption="Uploaded Image")

    if st.button("Extract Text πŸ”", type="primary"):
        with st.spinner("Processing image..."):
            try:
                # Convert image for API
                buffered = io.BytesIO()
                image.save(buffered, format="PNG")
                img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")

                # Prepare the prompt
                prompt = """Analyze the text in the provided image. Extract all readable content 
and present it in a structured Markdown format that is clear, concise, 
and well-organized. Ensure proper formatting (e.g., headings, lists, or 
code blocks) as necessary to represent the content effectively."""

                # Call the API
                result, error = call_groq_api(img_str, model, prompt)

                if error:
                    st.error(error)
                else:
                    st.session_state["ocr_result"] = result
            except Exception as e:
                st.error(f"Error processing image: {str(e)}")

# Main content area for results
if "ocr_result" in st.session_state:
    st.markdown(st.session_state["ocr_result"])
else:
    st.info("Upload an image and click 'Extract Text' to see the results here.")

# Footer
st.markdown("---")
st.markdown("Made using Vision Models via Groq API")