# import gradio as gr
# from langchain.llms import LlamaCpp
# from langchain import PromptTemplate, LLMChain
# from langchain.llms import GPT4All
# from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler


# # import requests

# # url = "https://huggingface.co/TheBloke/Nous-Hermes-13B-GGML/resolve/main/nous-hermes-13b.ggmlv3.q4_0.bin"

# # response = requests.get(url)

# # with open("nous-hermes-13b.ggmlv3.q4_0.bin", "wb") as f:
# #     f.write(response.content)


# print("DONE")

# def func(user):
        
#     template = """
#     Your name is John and not a assistant but more like a chatbot. Respond precise not more words and act like a human. for example: user: How are you? You: I'm doing good how about you?. user: hello You: Hello how you doing?. Don't say How can I assist you today?.
#     Question: {question}
    
#     Answer: """
    
#     prompt = PromptTemplate(template=template, input_variables=["question"])
    
#     local_path = (
#         "./nous-hermes-13b.ggmlv3.q4_0.bin"
#     )

    
#     # # Callbacks support token-wise streaming
#     # callbacks = [StreamingStdOutCallbackHandler()]
    
#     # Verbose is required to pass to the callback manager
#     llm = LlamaCpp(model_path="./nous-hermes-13b.ggmlv3.q4_0.bin", n_ctx=2048)
#     llm_chain = LLMChain(prompt=prompt, llm=llm)
#     question = user
#     llm_chain.run(question)

#     return llm_chain.run(question)

# iface = gr.Interface(fn=func, inputs="text", outputs="text")
# iface.launch()

# import gradio as gr
# from langchain.llms import LlamaCpp
# from langchain import PromptTemplate, LLMChain
# from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

# print("DONE")

# def func(user):
#     template = """
#     Your name is John and not a assistant but more like a chatbot. Respond precise not more words and act like a human. for example: user: How are you? You: I'm doing good how about you?. user: hello You: Hello how you doing?. Don't say How can I assist you today?.
#     Question: {question}
    
#     Answer: """
    
#     prompt = PromptTemplate(template=template, input_variables=["question"])
    
#     local_path = "./nous-hermes-13b.ggmlv3.q4_0.bin"
    
#     llm = LlamaCpp(model_path=local_path)
#     llm_chain = LLMChain(prompt=prompt, llm=llm, streaming=True)  # Enable streaming mode
#     question = user
#     llm_chain.run(question)

#     return llm_chain.run(question)

# iface = gr.Interface(fn=func, inputs="text", outputs="text")
# iface.launch()


import gradio as gr
from gpt4allj import Model

# Load the local model
model = Model('./ggml-gpt4all-j-v1.3-groovy.bin')

# Define a function that generates the model's response given a prompt
def generate_response(prompt):
    response = model.generate(prompt)
    return response

# Create a Gradio interface with a text input and an output text box
iface = gr.Interface(
    fn=generate_response,
    inputs="text",
    outputs="text",
    title="GPT-4 AllJ",
    description="Generate responses using GPT-4 AllJ model."
)

# Run the Gradio interface
iface.launch()