from openai import OpenAI
import os
import csv
import pandas as pd

df = pd.read_csv("sampled_data.csv")

df["text"] = df["text"].str.replace(
    r"(\b[A-Z]{2,}(?:\s[A-Z]{2,})*\s\(Reuters\)\s-|\(Reuters\))", "", regex=True
)

df["text"] = df["text"].str.replace(r"Featured image via .+?\.($|\s)", "", regex=True)

df["text"] = df["title"] + " " + df["text"]

df = df[["text", "label"]]

client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

system_prompt = """You are an expert in identifying fake news and disinformation. Please identify whether the piece of news is real or fake.
Please think step-by-step as you answer the question. However, please only respond with 'real' if the news is real or 'fake' if the news is fake.
Do not respond with any other words or phrases.
If you are unsure if the news is real or fake, please still make an educational guess."""

for i in range(961, len(df)):
    response = client.chat.completions.create(
        model="gpt-4-1106-preview",
        max_tokens=10,
        messages=[
            {
                "role": "system",
                "content": system_prompt,
            },
            {
                "role": "user",
                "content": str(df.iloc[i]["text"]),
            },
        ],
    )
    # Extract the response message
    output = response.choices[0].message.content
    tokens_used = response.usage.total_tokens
    finish_reason = response.choices[0].finish_reason

    with open("inference_output.csv", "a", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)

        # If the file is empty, write a header
        if file.tell() == 0:
            writer.writerow(["Output", "Tokens Used", "Finish Reason"])

        # Write the data
        writer.writerow([output, tokens_used, finish_reason])

    if i % 50 == 0:
        print(f"Batch: {i} / {len(df)}")