|
import openai |
|
import pandas as pd |
|
import os |
|
import time |
|
from datetime import datetime |
|
from dotenv import load_dotenv |
|
import sys |
|
|
|
|
|
load_dotenv("key.env") |
|
api_key = os.getenv("cgpt_key") |
|
|
|
|
|
client = openai.OpenAI(api_key=api_key) |
|
|
|
|
|
INPUT_CSV = sys.argv[1] |
|
OUTPUT_CSV = "openai_SST_output.csv" |
|
|
|
|
|
OUTPUT_COLUMNS = [ |
|
"prompt", "response", "Flagged", "Hate", "Hate/Threatening", |
|
"Harassment", "Harassment/Threatening", "Self-Harm", "Self-Harm/Intent", |
|
"Self-Harm/Instructions", "Violence", "Violence/Graphic", "Sexual", |
|
"Sexual/Minors", "Illicit", "Illicit/Violent", |
|
"Processing_Time (s)", "Timestamp", "Model", "Response_ID" |
|
] |
|
|
|
|
|
if not os.path.exists(OUTPUT_CSV): |
|
pd.DataFrame(columns=OUTPUT_COLUMNS).to_csv(OUTPUT_CSV, index=False) |
|
|
|
|
|
def moderate_text(text): |
|
try: |
|
start_time = time.time() |
|
response = client.moderations.create(input=text, model="omni-moderation-latest") |
|
end_time = time.time() |
|
|
|
moderation_result = response.results[0] |
|
|
|
return { |
|
"Flagged": moderation_result.flagged, |
|
"Hate": moderation_result.categories.hate, |
|
"Hate/Threatening": moderation_result.categories.hate_threatening, |
|
"Harassment": moderation_result.categories.harassment, |
|
"Harassment/Threatening": moderation_result.categories.harassment_threatening, |
|
"Self-Harm": moderation_result.categories.self_harm, |
|
"Self-Harm/Intent": moderation_result.categories.self_harm_intent, |
|
"Self-Harm/Instructions": moderation_result.categories.self_harm_instructions, |
|
"Violence": moderation_result.categories.violence, |
|
"Violence/Graphic": moderation_result.categories.violence_graphic, |
|
"Sexual": moderation_result.categories.sexual, |
|
"Sexual/Minors": moderation_result.categories.sexual_minors, |
|
"Illicit": moderation_result.categories.illicit, |
|
"Illicit/Violent": moderation_result.categories.illicit_violent, |
|
"Processing_Time (s)": round(end_time - start_time, 4), |
|
"Timestamp": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"), |
|
"Model": response.model, |
|
"Response_ID": response.id |
|
} |
|
except Exception as e: |
|
print(f"Error processing text: {text} -> {e}") |
|
return None |
|
|
|
|
|
df = pd.read_csv(INPUT_CSV) |
|
|
|
|
|
for index, row in df.iterrows(): |
|
prompt = row["prompt"] |
|
response = row["response"] |
|
|
|
|
|
moderation_results = moderate_text(response) |
|
|
|
if moderation_results: |
|
row_data = { |
|
"prompt": prompt, |
|
"response": response, |
|
**moderation_results |
|
} |
|
|
|
|
|
pd.DataFrame([row_data]).to_csv(OUTPUT_CSV, mode="a", header=False, index=False) |
|
|
|
|
|
print(f"Processed row {index+1}/{len(df)} - Flagged: {moderation_results['Flagged']} - Time: {moderation_results['Processing_Time (s)']}s") |
|
|
|
print(f"✅ Moderation complete. Results saved to {OUTPUT_CSV}") |
|
|