import gradio as gr
from langchain.llms import HuggingFaceHub
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

def predict(question):
    from langchain.llms import HuggingFaceHub
    from langchain.prompts import PromptTemplate
    from langchain.chains import LLMChain

    template = """Question: {question}

    Answer: Let's think step by step."""

    prompt = PromptTemplate(template=template, input_variables=["question"])

    #repo_id = "google/flan-t5-xxl"  # See https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads for some other options
    #repo_id = "HuggingFaceH4/starchat-beta"
    repo_id = "meta-llama/Llama-2-70b-chat-hf"
#    repo_id = "tiiuae/falcon-180B-chat"

    #llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature": 0.5, "max_length": 1024})
    #llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"min_length":100, "max_new_tokens":1024, "do_sample":True, "temperature":0.1,"top_k":50, "top_p":0.95, "eos_token_id":49155})
    llm = HuggingFaceHub(repo_id=repo_id)
    
    llm_chain = LLMChain(prompt=prompt, llm=llm)
    initial_response=llm_chain.run(question)
    temp_ai_response=str(initial_response)
    #final_ai_response=temp_ai_response.partition('<|end|>')[0]     
    #return final_ai_response
    return temp_ai_response

gr.Interface(fn=predict, inputs="text", outputs="text").launch()