from peft import PeftModel, PeftConfig
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from transformers import GenerationConfig

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

#Base Model 및 Lora Model 선택
base_model = "EleutherAI/polyglot-ko-5.8b" # "beomi/KoAlpaca-Polyglot-5.8B"
lora_weights = "KimSHine/YEOLLM_5.8B-lora_v3" # 'KimSHine/Scenario_Koalpaca_5.8B-lora'
load_8bit = True

# Base Model Tokenizer
tokenizer1 = AutoTokenizer.from_pretrained(base_model)
## token id 추가
tokenizer1.pad_token_id = 0
tokenizer1.eos_token_id = 2

"""### Base Model 불러오기"""

# KoAlpaca-polyglot-5.8B

model1 = AutoModelForCausalLM.from_pretrained(
            base_model,
            load_in_8bit=load_8bit,
            torch_dtype=torch.float16,
            device_map="auto",
        )

model1.config.pad_token_id = 0
model1.config.eos_token_id = 2

"""### LoRA Model 불러오기

Fine Tuning한 Model
"""

model1 = PeftModel.from_pretrained(
    model1,
    lora_weights,
    torch_dtype=torch.float16,
)

model1.config.pad_token_id = 0  # unk
model1.config.bos_token_id = 0
model1.config.eos_token_id = 2

val_dict = {"다큐멘터리": {
                      'instruction' : "줄거리를 참고해서 다큐멘터리 형식의 대본을 만드시오. 다큐멘터리는 진지한 대화입니다. 같은 말을 반복하지 마세요.",
                      'temperature' :0.65,
                      'top_p': 0.95,
                      'top_k':40,
                      'max_new_tokens':2048,
                      'no_repeat_ngram_size': 5,
                      'do_sample' : True,
                      'num_beams' : 5},
            "인터뷰": {
                 'instruction' : "줄거리를 참고해서 인터뷰 형식의 대본을 만드시오. 인터뷰는 인터뷰어와 인터뷰이의 대화이며 인터뷰어가 질문을 하고 인터뷰이가 대답을 하는 형식입니다. 같은 말을 반복하지 마시오.",
                      'temperature' :0.7,
                      'top_p': 0.95,
                      'top_k':40,
                      'max_new_tokens':2048,
                      'no_repeat_ngram_size': 5,
                      'do_sample' : True,
                      'num_beams' : 5},
            "뉴스": {
                 'instruction' : " 줄거리를 참고해서 뉴스 형식으로 대본을 만드시오. 뉴스 형식의 대본은 앵커가 줄거리를 바탕으로 최대한 사실적인 내용을 생동감있게 설명하는 대본입니다. 뉴스는 앵커가 인사말과 본론, 마지막 인사말로 구성되어 있다. 같은 말을 반복하지 마시오.",
                      'temperature' :0.7,
                      'top_p': 0.95,
                      'top_k':40,
                      'max_new_tokens':2048,
                      'no_repeat_ngram_size': 5,
                      'do_sample' : True,
                      'num_beams' : 5},
            "현대드라마": {
                 'instruction' : "줄거리를 참고해서 드마라 형식으로 대본을 만드시오.",
                      'temperature' :0.8,
                      'top_p': 0.95,
                      'top_k':40,
                      'max_new_tokens':2048,
                      'no_repeat_ngram_size': 5,
                      'do_sample' : True,
                      'num_beams' : 5},
            "사극": {
                 'instruction' : "줄거리를 참고해서 드라마 형식으로 대본을 만드시오.",
                      'temperature' :0.8,
                      'top_p': 0.95,
                      'top_k':40,
                      'max_new_tokens':2048,
                      'no_repeat_ngram_size': 5,
                      'do_sample' : True,
                      'num_beams' : 5}
            }

def yeollm_text(selected_value, summary):

    prompt = f"""아래는 작업을 설명하는 지시문과 대본을 생성하는데 참고할 줄거리입니다.\n
    ### 지시문:
    {val_dict[selected_value]['instruction']}
    ### 줄거리:
    {summary}
    ### 대본:
    """

    inputs = tokenizer1(prompt, return_tensors="pt")
    input_ids = inputs["input_ids"].to(DEVICE)

    generation_config = GenerationConfig(
        do_sample = val_dict[selected_value]['do_sample'],
        temperature=val_dict[selected_value]['temperature'],
        top_p=val_dict[selected_value]['top_p'],
        top_k=val_dict[selected_value]['top_k'],
        pad_token_id = 0,   # pad token 추가
        no_repeat_ngram_size = val_dict[selected_value]['no_repeat_ngram_size'],
        # num_beams=num_beams,
        # **kwargs,
    )

    # Generate text
    with torch.no_grad():
        generation_output = model1.generate(
            input_ids=input_ids,
            generation_config=generation_config,
            return_dict_in_generate=True,
            output_scores=True,
            max_new_tokens=val_dict[selected_value]['max_new_tokens'],
        )
    s = generation_output.sequences[0]
    output = tokenizer1.decode(s)
    output = output.split('### 대본:')[1]
    if output[-13:] == '<|endoftext|>':
      output = output[:-13]
    return output.lstrip()

"""## text davinci 003 불러오기"""

import openai
OPENAI_API_KEY = ''
openai.api_key = OPENAI_API_KEY

model2 = 'text-davinci-003' #'gpt-3.5-turbo'
max_tokens = 2048
temperature = 0.3
Top_p = 1

def davinci_text(selected_value, summary):
    prompt = f"""
    줄거리를 참고해서 {val_dict[selected_value]['instruction']} 형식의 대본을 만들어줘.
    ### 줄거리:
    {summary}
    ### 대본:
    """

    response = openai.Completion.create(
        engine = model2,
        prompt = prompt,
        temperature = temperature,
        max_tokens = max_tokens,
        n=1,
    )
    return response.choices[0].text.strip()

"""## gpt 3.5 turbo 불러오기"""

import openai
OPENAI_API_KEY = ''
openai.api_key = OPENAI_API_KEY

model4 = 'gpt-3.5-turbo' #'gpt-3.5-turbo'
max_tokens = 2048
temperature = 0.3
Top_p = 1


def gpt_text(selected_value, summary):
  prompt =  f"""
    ### 지시문:
    줄거리를 참고해서 {val_dict[selected_value]['instruction']} 형식의 대본을 만들어줘.
    ### 줄거리:
    {summary}
    ### 대본:
    """
  response = openai.ChatCompletion.create(
      model = model4,
      messages=[
        {"role": "system", "content": "아래는 작업을 설명하는 지시문과 대본을 생성하는데 참고할 줄거리와 짝을 이루는 예제입니다. 요청을 적절히 만족하는 대본을 작성하세요."},
        {"role": "user", "content": prompt},
        ],
      temperature = temperature,
      max_tokens = max_tokens,
      n=1,
  )
  for choice in response["choices"]:
      content = choice["message"]["content"]

  return content.lstrip()

"""# gradio"""

import gradio as gr

generator1 = gr.Interface(
    fn=yeollm_text,
    inputs=[
        gr.Dropdown(["다큐멘터리", "인터뷰", "뉴스", '현대드라마', '사극'], label="형식"),
        #gr.inputs.Textbox(label="Instruction",placeholder="줄거리를 참고해서 현대 드라마 형식의 대본을 만들어줘"),
        gr.inputs.Textbox(label="Summary",placeholder="대본으로 바꾸고 싶은 줄거리"),
    ],
    outputs=gr.outputs.Textbox(label="Yeollm Scenario"),
    title="Yeollm Scenario Generation",
    description="Generate scenarios using the Yeollm model.",
    theme="huggingface"
)

generator2 = gr.Interface(
    fn=davinci_text,
    inputs=[
        gr.Dropdown(["다큐멘터리", "인터뷰", "뉴스", '현대드라마', '사극'], label="형식"),
        gr.inputs.Textbox(label="Summary")
    ],
    outputs=gr.outputs.Textbox(label="Davinci Scenario"),
    title="Davinci Generation",
    description="Generate scenarios using the Davinci model.",
    theme="huggingface"
)

generator3 = gr.Interface(
    fn=gpt_text,
    inputs=[
        gr.Dropdown(["다큐멘터리", "인터뷰", "뉴스", '현대드라마', '사극'], label="형식"),
        gr.inputs.Textbox(label="Summary")
    ],
    outputs=gr.outputs.Textbox(label="GPT Scenario"),
    title="GPT Generation",
    description="Generate scenarios using the GPT model.",
    theme="huggingface"
)

gr.Parallel(generator1, generator2, generator3).launch()