File size: 3,075 Bytes
9c1188f 4213f50 e3894fb 078637c 4213f50 6524289 4213f50 49ae654 078637c 49ae654 5dd2646 49ae654 e3894fb 5dd2646 49ae654 5dd2646 e3894fb 49ae654 4213f50 49ae654 9c1188f 078637c ea62522 078637c 20ab8bc 078637c 9c1188f ce41f2c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import gradio as gr
import random
import subprocess
import time
def generate_response(user_message): #Figure Out the parameters later and find a way to get the ram usage
cmd = [
"/app/llama.cpp/main", # Path to the executable
"-m", "/app/llama.cpp/models/stablelm-2-zephyr-1_6b-Q4_0.gguf",
"-p", user_message,
"-n", "400",
"-e"
]
# Start the subprocess
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
start_time = time.time()
alllines = ""
# Yield each line of output as it becomes available
for line in process.stdout:
alllines += " " + line
elapsed_time = time.time() - start_time # Calculate elapsed time
yield f"{alllines} [Inference time: {elapsed_time:.2f} seconds]"
# Wait for the subprocess to finish if it hasn't already
process.wait()
# Check for any errors
if process.returncode != 0:
error_message = process.stderr.read()
print(f"Error: {error_message}")
def custom_generate_response(user_message, builtinprompt): #Figure Out the parameters later and find a way to get the ram usage
user_message = builtinprompt + '\n\n ' + user_message
cmd = [
"/app/llama.cpp/main", # Path to the executable
"-m", "/app/llama.cpp/models/stablelm-2-zephyr-1_6b-Q4_0.gguf",
"-p", user_message,
"-n", "400",
"-e"
]
# Start the subprocess
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
start_time = time.time()
alllines = ""
# Yield each line of output as it becomes available
for line in process.stdout:
alllines += " " + line
elapsed_time = time.time() - start_time # Calculate elapsed time
yield f"{alllines} [Inference time: {elapsed_time:.2f} seconds]"
# Wait for the subprocess to finish if it hasn't already
process.wait()
# Check for any errors
if process.returncode != 0:
error_message = process.stderr.read()
print(f"Error: {error_message}")
CustomPrompts = [
"Class Diagram for:",
"Pydot code for:",
]
with gr.Blocks() as iface:
gr.Interface(
fn=generate_response,
inputs=gr.Textbox(lines=2, placeholder="Type your message here..."),
outputs="text",
title="Stable LM 2 Zephyr (1.6b) LLama.cpp Interface Test",
description="No Message History for now - Enter your message and get a response.",
flagging_dir="/usr/src/app/flagged",
)
gr.HTML()
MainOutput = gr.TextArea()
CustomButtonInput = gr.TextArea()
CustomButtonClassDiagram = gr.Button(CustomPrompts[0])
CustomButtonPydotcode = gr.Button(CustomPrompts[1])
CustomButtonClassDiagram .click(custom_generate_response, inputs=[CustomButtonInput, CustomPrompts[0]], outputs=MainOutput)
CustomButtonPydotcode.click(custom_generate_response, inputs=[CustomButtonInput, CustomPrompts[1]], outputs=MainOutput)
iface.launch(server_name="0.0.0.0") #share=True) |