from openai import OpenAI import os import csv import pandas as pd df = pd.read_csv("sampled_data.csv") df["text"] = df["text"].str.replace( r"(\b[A-Z]{2,}(?:\s[A-Z]{2,})*\s\(Reuters\)\s-|\(Reuters\))", "", regex=True ) df["text"] = df["text"].str.replace(r"Featured image via .+?\.($|\s)", "", regex=True) df["text"] = df["title"] + " " + df["text"] df = df[["text", "label"]] client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) system_prompt = """You are an expert in identifying fake news and disinformation. Please identify whether the piece of news is real or fake. Please think step-by-step as you answer the question. However, please only respond with 'real' if the news is real or 'fake' if the news is fake. Do not respond with any other words or phrases. If you are unsure if the news is real or fake, please still make an educational guess.""" for i in range(961, len(df)): response = client.chat.completions.create( model="gpt-4-1106-preview", max_tokens=10, messages=[ { "role": "system", "content": system_prompt, }, { "role": "user", "content": str(df.iloc[i]["text"]), }, ], ) # Extract the response message output = response.choices[0].message.content tokens_used = response.usage.total_tokens finish_reason = response.choices[0].finish_reason with open("inference_output.csv", "a", newline="", encoding="utf-8") as file: writer = csv.writer(file) # If the file is empty, write a header if file.tell() == 0: writer.writerow(["Output", "Tokens Used", "Finish Reason"]) # Write the data writer.writerow([output, tokens_used, finish_reason]) if i % 50 == 0: print(f"Batch: {i} / {len(df)}")