from secrets_key import OPENAI_KEY, RANDOM_SEED from openai import OpenAI import json import pandas as pd from pprint import pprint client = OpenAI(api_key=OPENAI_KEY) prompt = """ You are given a story and 3 images related to the story. Identify a person/object that can be visually identified in the images but not directly mentioned on the story. Use as few words as possible to describe each person/object. Also, mention the image number (1, 2 or 3) where the person/object can be found. Output in a python list of dictionaries. Each dictionary should have the following keys: 'image_number', 'person/object'. Story: {story} """ def get_entity_gpt4V(row): story = row['Input.story'] now_prompt = prompt.format(story=story) content = [ {"type": "text", "text": now_prompt}, ] images = [] for i in range(1,4): image_url = row[f'Input.image{i}'] images.append(image_url) content.append({ "type": "image_url", "image_url": { "url": image_url, }, }) response = client.chat.completions.create( model="gpt-4-vision-preview", seed=RANDOM_SEED, messages=[ { "role": "user", "content": content } ], temperature=1, max_tokens=256, top_p=1, frequency_penalty=0, presence_penalty=0, ) print(row['HITId']) print(now_prompt) pprint(images) out = response.choices[0].message.content print("OUTPUT:", out) print("====================================") print() if __name__ == '__main__': df = pd.read_csv('./results.csv') count = 0 done = set() for ind, row in df.iterrows(): item_id = row['Input.item_id'] if item_id in done: continue done.add(item_id) get_entity_gpt4V(row) count += 1 if count == 10: break