File size: 2,343 Bytes
583f59a
30bc075
 
583f59a
68d960f
11e1c78
 
 
34c1556
 
11e1c78
583f59a
3ee0221
11e1c78
3ee0221
11e1c78
 
3ee0221
 
11e1c78
 
 
3ee0221
 
11e1c78
 
 
 
 
 
b4a5839
 
 
f60fbd6
8dcc082
bbea512
 
 
b4a5839
 
3ee0221
 
 
908f3e3
b497264
3ee0221
11e1c78
3ee0221
 
 
 
e60d3bf
3ee0221
 
eee62a6
11e1c78
583f59a
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import streamlit as st 
import os
import pathlib
import textwrap 
from PIL import Image

import google.generativeai as genai

api_key = os.getenv("GEMINI_API_KEY")
genai.configure(api_key=api_key)

## Function to load OpenAI model and get respones
def get_gemini_response(input,image,prompt):
    model = genai.GenerativeModel('gemini-pro-vision')
    response = model.generate_content([input,image[0],prompt])
    return response.text

def input_image_setup(uploaded_file): # Check if a file has been uploaded
    if uploaded_file is not None: # Read the file into bytes
        bytes_data = uploaded_file.getvalue()
        image_parts = [
            {
            "mime_type": uploaded_file.type,  # Get the mime type of the uploaded file
            "data": bytes_data
            }
        ]
        return image_parts
    else:
        raise FileNotFoundError("No file uploaded")

st.set_page_config(page_title="Gemini Image Demo")

st.header("Generative AI : Business Card Reader")
st.caption("""This space is based on Google generative ai API and it uses Gemini pro vision model 
to extract text from business card images. You can use your own images for input 
or find sample images in example folder of files section in this space. 
You can add input prompt below if you want to get specific imnformation from image. 
You can modify this space for other input like invoice.""")
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
image="" 
if uploaded_file is not None: 
    image = Image.open(uploaded_file)
    st.image(image, caption="Uploaded Image.", use_column_width=True)
input=st.text_input("Input Prompt (Optinal) : ",key="input")

submit=st.button("Submit")

input_prompt ="""
               You are an expert in understanding business cards.
               Input: Image of a business card.
               Task: Extract and label the following information in JSON format:
               Labels : person_name, company_name, occupation, contact_number, email addresse, website, address, other_details (services, features, etc.)  
               Constraints: Do not include missing information.
               """

if submit:
    image_data = input_image_setup(uploaded_file)
    response = get_gemini_response(input_prompt,image_data,input)
    st.subheader("Output :")
    st.write(response)