Spaces:
Sleeping
Sleeping
| from langchain.callbacks.manager import CallbackManager | |
| from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
| from langchain_community.llms import LlamaCpp | |
| from pprint import pprint | |
| import gradio as gr | |
| # Initialize callback manager and LLM | |
| callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) | |
| llm = LlamaCpp( | |
| model_path="llama-2-7b-chat.Q3_K_M.gguf", | |
| temperature=0.75, | |
| max_tokens=2000, | |
| top_p=1, | |
| callback_manager=callback_manager, | |
| verbose=True, # Verbose is required to pass to the callback manager | |
| ) | |
| # Define the function to interact with the LLM | |
| def llama_llm(prompt): | |
| llama_prompt = f"<s>[INST]<<SYS>>\nEve lives in Hamburg.; Bob lives in Cape Town.; Alice lives in Mumbay.\n<</SYS>>\n{prompt}[/INST]" | |
| response = llm(llama_prompt) | |
| return response | |
| # Create the Gradio interface | |
| iface = gr.Interface( | |
| fn=llama_llm, | |
| inputs="text", | |
| outputs="text", | |
| title="Llama LLM Chat Interface", | |
| description="Ask a question based on the system prompt: 'Eve lives in Hamburg.; Bob lives in Cape Town.; Alice lives in Mumbay.'" | |
| ) | |
| # Launch the Gradio interface | |
| iface.launch() | |