import os
import requests

import gradio as gr
import torch
from transformers import (
    RobertaForSequenceClassification,
    RobertaTokenizer,
    RobertaConfig,
)

HF_TOKEN = os.environ["HF_TOKEN"]

os.system(
    "wget https://openaipublic.azureedge.net/gpt-2/detector-models/v1/detector-base.pt"
)

config = RobertaConfig.from_pretrained("roberta-base")
model = RobertaForSequenceClassification(config)
model.load_state_dict(torch.load("detector-base.pt")["model_state_dict"])

tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model.to(device)


def call_inference_api(query):
    url = "https://api-inference.huggingface.co/models/roberta-base-openai-detector"

    headers = {"Authorization": f"Bearer {HF_TOKEN}"}
    response = requests.post(url, json={"inputs": query}, headers=headers)

    code = response.status_code
    if code == 200:
        fake, real = response.json()[0]

        fake_score = fake["score"]
        real_score = real["score"]

        return f"Fake: {fake_score:.2%} | Real: {real_score:.2%}"

    else:
        error = response.json()["error"]
        warning = response.json()["warnings"]
        return f"Error: {error} | Warning: {warning}"


def local_call(query):
    # Copied from https://github.com/openai/gpt-2-output-dataset/tree/master/detector#L35-L46
    tokens = tokenizer.encode(query)
    all_tokens = len(tokens)
    tokens = tokens[: tokenizer.max_len - 2]
    used_tokens = len(tokens)
    tokens = torch.tensor(
        [tokenizer.bos_token_id] + tokens + [tokenizer.eos_token_id]
    ).unsqueeze(0)
    mask = torch.ones_like(tokens)

    with torch.no_grad():
        logits = model(tokens.to(device), attention_mask=mask.to(device))[0]
        probs = logits.softmax(dim=-1)

    fake, real = probs.detach().cpu().flatten().numpy().tolist()

    return f"Fake: {fake:.2%} | Real: {real:.2%} | Used tokens: {used_tokens} | All tokens: {all_tokens}"


def main_function(query):
    hosted_output = call_inference_api(query)
    local_output = local_call(query)

    return hosted_output, local_output


text_input = gr.Textbox(
    lines=5,
    label="Enter text to compare output with the model hosted here: https://huggingface.co/roberta-base-openai-detector",
)
hosted_output = gr.Textbox(label="Output from model hosted on Hugging Face")
local_output = gr.Textbox(
    label="Output from model running locally on transformers 2.0.0, tokenizers 0.7.0, and torch 1.4.0"
)

description = "The original repository for the model used an older version of \
transformers, tokenziers, and torch which results in slightly different results \
    compared to the model hosted on Hugging Face. This app compares the two models."

demo = gr.Interface(
    fn=main_function,
    inputs="text",
    outputs=[hosted_output, local_output],
    title="Compare OpenAI detector models",
    description=description,
)

demo.launch()