import gradio as gr
from huggingface_hub import InferenceClient
import os
from openai import OpenAI
import pandas as pd
import numpy as np
import json
from typing import List, Dict
import time
from haystack.utils import Secret  # For securely storing the OpenAI API key

# Setup OpenAI client
client = OpenAI(api_key=os.getenv('OPENAI_KEY'))


def query_openai(messages: List[Dict[str, str]],
                model: str = "gpt-4o-mini",
                temperature: float = 0.7) -> str:
    try:
        response = client.chat.completions.create(
            model=model,
            messages=messages,
            temperature=temperature
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error querying OpenAI: {e}")
        return None

def evaluate_prompt(prompt_template: dict, review: str) -> pd.DataFrame:
    messages = [
        {"role": "system", "content": prompt_template['system']},
        {"role": "user", "content": prompt_template['user'].replace('{{REVIEW}}', review)}
    ]
    response = query_openai(messages)

    # Extract classification from XML tags
    import re
    classification_match = re.search(r'<classification>(.*?)</classification>', response, re.IGNORECASE)
    predicted = classification_match.group(1).strip().lower() if classification_match else "unknown"

    return {
        'predicted': predicted,
        'response': response
    }

prompt_v3 = {
    'system': """You are Roger Ebert and you are here to help us understand the sentiment of movie reviews.""",

    'user': """Classify the following review as <classification>positive</classification> or <classification>negative</classification> (please stick to these labels), using a step-by-step analysis.

    Output Format:
    1. List key positive sentiment indicators
    2. List key negative sentiment indicators
    3. List anything that has a sentiment but is not relevant to the movie itself
    4. Provide reasoning for your classification decision
    5. End with classification in XML tags:
    <classification>positive</classification> or <classification>negative</classification>
    
    Example 1:
    
    Input: I loved this movie. However the main actor didn't fit the role. But the plot totally saved it.
    Output: <classification>positive</classification> 
    
    Example 2:
    
    Input: The movie was actually not THAT bad, especially plot-wise, but the doughy (and hairy!) actor they chose for the leading role was a little chintzy in the acting department. I would have chosen someone else. The idea of "going to America" was very ingenious, and the main character questioning everything that he'd ever known made him somewhat likable, but not very much so when there's a pasty blob for a leading actor.<br /><br />The storyline was interesting. It brings about the question of how the subject of cloning will be handled in the future. Certainly cloning wouldn't be allowed for the purposes in the movie, but it's still a valid argument even for today. Clones ARE still people... right?<br /><br />The movie wasn't particularly special, but it still is a lot better than some of the cheese released during the 70s. Let us not forget the "Giant Spider Invasion." I give it a 4, since it didn't TOTALLY stink, but the MST3K version makes this movie a 10. (I still like Dr. Super Mario!) You'll like this movie, but it won't be your favorite.
    Output: <classification>negative</classification>
    
    Review to classify: {{REVIEW}}"""
}
    
# def respond(
#     message,
#     system_message,
#     max_tokens,
#     temperature,
#     top_p,
# ):
#     messages = [{"role": "system", "content": system_message}]

#     messages.append({"role": "user", "content": message})

#     response = ""

#     for message in client.chat_completion(
#         messages,
#         max_tokens=max_tokens,
#         stream=True,
#         temperature=temperature,
#         top_p=top_p,
#     ):
#         token = message.choices[0].delta.content

#         response += token
#         yield response

def respond(review):
    result = evaluate_prompt(prompt_v3, review) 
    return result['predicted'] 

demo = gr.Interface(
    respond,
    inputs=[
        gr.Textbox(label="Movie review")  # Set label for the input
    ],
    # additional_inputs=[
        # gr.Textbox(value="You are a movie review classifier. You respond to given movie reviews with a predicted star rating 0-5 (inclusive) for that review with no explanation.", label="System message"),
        # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        # gr.Slider(
        #     minimum=0.1,
        #     maximum=1.0,
        #     value=0.95,
        #     step=0.05,
        #     label="Top-p (nucleus sampling)",
        # ),
    # ],
    outputs=[
        gr.Textbox(label="Predicted Sentiment")  # Set label for the output
    ],
)


if __name__ == "__main__":
    demo.launch()