import gradio as gr from langchain.llms import HuggingFaceHub from langchain.prompts import PromptTemplate from langchain.chains import LLMChain def predict(question): from langchain.llms import HuggingFaceHub from langchain.prompts import PromptTemplate from langchain.chains import LLMChain template = """Question: {question} Answer: Let's think step by step.""" prompt = PromptTemplate(template=template, input_variables=["question"]) #repo_id = "google/flan-t5-xxl" # See https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads for some other options #repo_id = "HuggingFaceH4/starchat-beta" repo_id = "meta-llama/Llama-2-70b-chat-hf" # repo_id = "tiiuae/falcon-180B-chat" #llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature": 0.5, "max_length": 1024}) #llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"min_length":100, "max_new_tokens":1024, "do_sample":True, "temperature":0.1,"top_k":50, "top_p":0.95, "eos_token_id":49155}) llm = HuggingFaceHub(repo_id=repo_id) llm_chain = LLMChain(prompt=prompt, llm=llm) initial_response=llm_chain.run(question) temp_ai_response=str(initial_response) #final_ai_response=temp_ai_response.partition('<|end|>')[0] #return final_ai_response return temp_ai_response gr.Interface(fn=predict, inputs="text", outputs="text").launch()