#!/usr/bin/env python3
"""
Simplified ChatGPT inference script for yes/no classification
Forces single token responses for consistent results
"""

from multiprocessing.pool import ThreadPool
import pandas as pd
from openai import OpenAI
import time
from datetime import datetime
from app import format_prompt

# Initialize OpenAI client
MICROSERVICES_FIVERR_OS_FIVERR_OS_BACKEND_CREDENTIALS_OPENAI_API_KEY="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"
client = OpenAI(api_key=MICROSERVICES_FIVERR_OS_FIVERR_OS_BACKEND_CREDENTIALS_OPENAI_API_KEY)

def get_prediction(query, title, content, model="gpt-5-nano"):
    """Get yes/no prediction from ChatGPT"""
    prompt = format_prompt(query, title, content)
    
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "user", "content": prompt}
            ],
        )
        
        # Get prediction
        prediction = response.choices[0].message.content.strip().lower()
        
        # Ensure it's yes or no
        if prediction not in ['yes', 'no']:
            prediction = 'error'
            print(prediction)
            
        return prediction
        
    except Exception as e:
        print(f"API Error: {e}")
        return 'error'

def main():
    csv_path ="sampled_db.csv"
    # Load CSV

    print(f"Loading {csv_path}...")
    df = pd.read_csv(csv_path)
    # Process each row
    prds = [(str(row['query_text']),str(row['title']),str(row['text'])) for idx, row in df.iterrows()]
    predictions = ThreadPool(100).starmap(get_prediction,prds)

    df['prediction'] = predictions
    conf_matrix = pd.crosstab(
        index=df['label'],  # True labels
        columns=df['prediction'],  # Predicted labels
        rownames=['Actual'],
        colnames=['Predicted']
    )
    accuracy = (df['prediction']=='yes')&((df['label']=='easy_positive')|(df['label']=='hard_positive'))|(df['prediction']=='no')&((df['label']=='easy_negative')|(df['label']=='hard_negative'))
    print(conf_matrix)
    print(accuracy.mean())
    output = f"chatgpt_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
    df.to_csv(output, index=False)
    print(f"\nSaved to: {output}")

    # Show summary
    print("\nResults:")
    print(df['prediction'].value_counts())

def make_sample_db():
    df = pd.read_csv(rf"train_datasets_creation/full_train_dataset.csv")
    dfs = [df[df['label']==d].sample(100) for d in df['label'].unique()]
    df = pd.concat(dfs).reset_index()
    df.to_csv(f"sample_db_{datetime.now().isoformat()}.csv")


if __name__ == "__main__":
    make_sample_db()