Spaces:
Sleeping
Sleeping
File size: 1,164 Bytes
1e3a3f5 48bad91 1e3a3f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_community.llms import LlamaCpp
from pprint import pprint
import gradio as gr
# Initialize callback manager and LLM
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
llm = LlamaCpp(
model_path="llama-2-7b-chat.Q3_K_M.gguf",
temperature=0.75,
max_tokens=2000,
top_p=1,
callback_manager=callback_manager,
verbose=True, # Verbose is required to pass to the callback manager
)
# Define the function to interact with the LLM
def llama_llm(prompt):
llama_prompt = f"<s>[INST]<<SYS>>\nEve lives in Hamburg.; Bob lives in Cape Town.; Alice lives in Mumbay.\n<</SYS>>\n{prompt}[/INST]"
response = llm(llama_prompt)
return response
# Create the Gradio interface
iface = gr.Interface(
fn=llama_llm,
inputs="text",
outputs="text",
title="Llama LLM Chat Interface",
description="Ask a question based on the system prompt: 'Eve lives in Hamburg.; Bob lives in Cape Town.; Alice lives in Mumbay.'"
)
# Launch the Gradio interface
iface.launch()
|