kimic's picture
Initial commit for GPT
bfe6b6c
from openai import OpenAI
import os
import csv
import pandas as pd
df = pd.read_csv("sampled_data.csv")
df["text"] = df["text"].str.replace(
r"(\b[A-Z]{2,}(?:\s[A-Z]{2,})*\s\(Reuters\)\s-|\(Reuters\))", "", regex=True
)
df["text"] = df["text"].str.replace(r"Featured image via .+?\.($|\s)", "", regex=True)
df["text"] = df["title"] + " " + df["text"]
df = df[["text", "label"]]
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
system_prompt = """You are an expert in identifying fake news and disinformation. Please identify whether the piece of news is real or fake.
Please think step-by-step as you answer the question. However, please only respond with 'real' if the news is real or 'fake' if the news is fake.
Do not respond with any other words or phrases.
If you are unsure if the news is real or fake, please still make an educational guess."""
for i in range(961, len(df)):
response = client.chat.completions.create(
model="gpt-4-1106-preview",
max_tokens=10,
messages=[
{
"role": "system",
"content": system_prompt,
},
{
"role": "user",
"content": str(df.iloc[i]["text"]),
},
],
)
# Extract the response message
output = response.choices[0].message.content
tokens_used = response.usage.total_tokens
finish_reason = response.choices[0].finish_reason
with open("inference_output.csv", "a", newline="", encoding="utf-8") as file:
writer = csv.writer(file)
# If the file is empty, write a header
if file.tell() == 0:
writer.writerow(["Output", "Tokens Used", "Finish Reason"])
# Write the data
writer.writerow([output, tokens_used, finish_reason])
if i % 50 == 0:
print(f"Batch: {i} / {len(df)}")