kimic
/

fake-news-detector-GPT-4-Turbo

Model card Files Files and versions Community

fake-news-detector-GPT-4-Turbo / inference.py

kimic's picture

Initial commit for GPT

bfe6b6c about 1 year ago

1.86 kB

	from openai import OpenAI
	import os
	import csv
	import pandas as pd

	df = pd.read_csv("sampled_data.csv")

	df["text"] = df["text"].str.replace(
	r"(\b[A-Z]{2,}(?:\s[A-Z]{2,})*\s\(Reuters\)\s-\|\(Reuters\))", "", regex=True
	)

	df["text"] = df["text"].str.replace(r"Featured image via .+?\.($\|\s)", "", regex=True)

	df["text"] = df["title"] + " " + df["text"]

	df = df[["text", "label"]]

	client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

	system_prompt = """You are an expert in identifying fake news and disinformation. Please identify whether the piece of news is real or fake.
	Please think step-by-step as you answer the question. However, please only respond with 'real' if the news is real or 'fake' if the news is fake.
	Do not respond with any other words or phrases.
	If you are unsure if the news is real or fake, please still make an educational guess."""

	for i in range(961, len(df)):
	response = client.chat.completions.create(
	model="gpt-4-1106-preview",
	max_tokens=10,
	messages=[
	{
	"role": "system",
	"content": system_prompt,
	},
	{
	"role": "user",
	"content": str(df.iloc[i]["text"]),
	},
	],
	)
	# Extract the response message
	output = response.choices[0].message.content
	tokens_used = response.usage.total_tokens
	finish_reason = response.choices[0].finish_reason

	with open("inference_output.csv", "a", newline="", encoding="utf-8") as file:
	writer = csv.writer(file)

	# If the file is empty, write a header
	if file.tell() == 0:
	writer.writerow(["Output", "Tokens Used", "Finish Reason"])

	# Write the data
	writer.writerow([output, tokens_used, finish_reason])

	if i % 50 == 0:
	print(f"Batch: {i} / {len(df)}")