import os os.system("pip install llama-cpp-python") import requests from gradio import Interface, Slider, Textbox from llama_cpp import Llama url = "https://huggingface.co/TheBloke/UltraLM-13B-GGML/resolve/main/ultralm-13b.ggmlv3.q4_0.bin" response = requests.get(url) with open("ultralm-13b.ggmlv3.q4_0.bin", "wb") as f: f.write(response.content) # os.system("pwd") # os.system("ls") llm = Llama(model_path="ultralm-13b.ggmlv3.q4_0.bin", n_ctx=2048) def generate_text(prompt, temperature, max_length): prompt_template=f'''USER: {prompt} ASSISTANT:''' output = llm.create_completion(prompt_template, temperature=temperature, max_tokens=max_length, echo=True, stop=["USER:"]) print(output) return output Interface(fn=generate_text, inputs=[Textbox(type="text", lines=10), Slider(minimum=0, maximum=2, step=0.1, value=0.7), Slider(minimum=1, maximum=2048, step=2, value=256)], outputs=Textbox(type="text", lines=20), title="UltraChat 13B Text Generation", description="Enter a prompt to generate text.").launch()