import streamlit as st import os from clarifai.modules.css import ClarifaiStreamlitCSS from clarifai.client.model import Model from io import BytesIO import requests from PIL import Image, ImageDraw, ImageFont st.set_page_config(layout="wide") ClarifaiStreamlitCSS.insert_default_css(st) st.title("Data Labeling using General Object Detection Model and GPT4 Vision") # usage new def main(): IMAGE_URL = st.text_input("Paste a Image URL below to get started", value="https://s3.amazonaws.com/samples.clarifai.com/black-car.jpg") # Clarifai Credentials with st.sidebar: st.subheader('Add your Clarifai PAT.') clarifai_pat = st.text_input('Clarifai PAT:', type='password') if not clarifai_pat: st.warning('Please enter your PAT to continue!', icon='⚠️') else: os.environ['CLARIFAI_PAT'] = clarifai_pat detector_model = Model("https://clarifai.com/clarifai/main/models/objectness-detector") prediction_response = detector_model.predict_by_url(IMAGE_URL, input_type='image') # Since we have one input, one output will exist here regions = prediction_response.outputs[0].data.regions model_url = "https://clarifai.com/openai/chat-completion/models/openai-gpt-4-vision" classes = ['Ferrari 812', 'Volkswagen Beetle', 'BMW M5', 'Honda Civic'] threshold = 0.99 response = requests.get(IMAGE_URL) img = Image.open(BytesIO(response.content)) draw = ImageDraw.Draw(img) # Accessing and rounding the bounding box values for region in regions: top_row = round(region.region_info.bounding_box.top_row, 3) left_col = round(region.region_info.bounding_box.left_col, 3) bottom_row = round(region.region_info.bounding_box.bottom_row, 3) right_col = round(region.region_info.bounding_box.right_col, 3) for concept in region.data.concepts: # Accessing and rounding the concept value prompt = f"Label the Car in the Bounding Box region: ({top_row}, {left_col}), ({bottom_row}, {right_col}) with one word {classes}" inference_params = dict(capture=0.2, max_tokens=100, image_url=IMAGE_URL) # Model Predict model_prediction = Model(model_url).predict_by_bytes(prompt.encode(), input_type="text", inference_params=inference_params) concept_name = model_prediction.outputs[0].data.text.raw value = round(concept.value, 4) if value > threshold: # Multiply by axis top_row = top_row * img.height left_col = left_col * img.width bottom_row = bottom_row * img.height right_col = right_col * img.width draw.rectangle([int(left_col), int(top_row), (int(right_col), int(bottom_row))], outline=(36, 255, 12), width=2) # Display text font = ImageFont.load_default() draw.text((int(left_col), int(top_row - 10)), concept_name, font=font, fill=(36, 255, 12)) st.image(img, caption='Image with label', channels="BGR", use_column_width=True) if __name__ == '__main__': main()