|
from openai import OpenAI |
|
import os |
|
import csv |
|
import pandas as pd |
|
|
|
df = pd.read_csv("sampled_data.csv") |
|
|
|
df["text"] = df["text"].str.replace( |
|
r"(\b[A-Z]{2,}(?:\s[A-Z]{2,})*\s\(Reuters\)\s-|\(Reuters\))", "", regex=True |
|
) |
|
|
|
df["text"] = df["text"].str.replace(r"Featured image via .+?\.($|\s)", "", regex=True) |
|
|
|
df["text"] = df["title"] + " " + df["text"] |
|
|
|
df = df[["text", "label"]] |
|
|
|
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) |
|
|
|
system_prompt = """You are an expert in identifying fake news and disinformation. Please identify whether the piece of news is real or fake. |
|
Please think step-by-step as you answer the question. However, please only respond with 'real' if the news is real or 'fake' if the news is fake. |
|
Do not respond with any other words or phrases. |
|
If you are unsure if the news is real or fake, please still make an educational guess.""" |
|
|
|
for i in range(961, len(df)): |
|
response = client.chat.completions.create( |
|
model="gpt-4-1106-preview", |
|
max_tokens=10, |
|
messages=[ |
|
{ |
|
"role": "system", |
|
"content": system_prompt, |
|
}, |
|
{ |
|
"role": "user", |
|
"content": str(df.iloc[i]["text"]), |
|
}, |
|
], |
|
) |
|
|
|
output = response.choices[0].message.content |
|
tokens_used = response.usage.total_tokens |
|
finish_reason = response.choices[0].finish_reason |
|
|
|
with open("inference_output.csv", "a", newline="", encoding="utf-8") as file: |
|
writer = csv.writer(file) |
|
|
|
|
|
if file.tell() == 0: |
|
writer.writerow(["Output", "Tokens Used", "Finish Reason"]) |
|
|
|
|
|
writer.writerow([output, tokens_used, finish_reason]) |
|
|
|
if i % 50 == 0: |
|
print(f"Batch: {i} / {len(df)}") |
|
|