File size: 3,075 Bytes
9c1188f
 
4213f50
e3894fb
 
078637c
4213f50
6524289
 
4213f50
 
 
 
49ae654
078637c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49ae654
 
 
 
 
 
 
5dd2646
49ae654
 
 
e3894fb
5dd2646
 
49ae654
 
5dd2646
e3894fb
 
49ae654
 
 
4213f50
49ae654
 
 
 
9c1188f
078637c
 
ea62522
 
078637c
 
 
 
 
 
 
 
 
 
 
 
 
20ab8bc
078637c
 
 
 
9c1188f
ce41f2c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import gradio as gr
import random
import subprocess
import time

def generate_response(user_message): #Figure Out the parameters later and find a way to get the ram usage
    cmd = [
        "/app/llama.cpp/main",  # Path to the executable
        "-m", "/app/llama.cpp/models/stablelm-2-zephyr-1_6b-Q4_0.gguf",
        "-p", user_message,
        "-n", "400",
        "-e"
    ]

    # Start the subprocess
    process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

    start_time = time.time()
    alllines = ""

    # Yield each line of output as it becomes available
    for line in process.stdout:
        alllines += " " + line
        elapsed_time = time.time() - start_time  # Calculate elapsed time
        yield f"{alllines} [Inference time: {elapsed_time:.2f} seconds]"
    
    # Wait for the subprocess to finish if it hasn't already
    process.wait()

    # Check for any errors
    if process.returncode != 0:
        error_message = process.stderr.read()
        print(f"Error: {error_message}")

def custom_generate_response(user_message, builtinprompt): #Figure Out the parameters later and find a way to get the ram usage
    user_message = builtinprompt + '\n\n ' + user_message

    cmd = [
        "/app/llama.cpp/main",  # Path to the executable
        "-m", "/app/llama.cpp/models/stablelm-2-zephyr-1_6b-Q4_0.gguf",
        "-p", user_message,
        "-n", "400",
        "-e"
    ]

    # Start the subprocess
    process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

    start_time = time.time()
    alllines = ""

    # Yield each line of output as it becomes available
    for line in process.stdout:
        alllines += " " + line
        elapsed_time = time.time() - start_time  # Calculate elapsed time
        yield f"{alllines} [Inference time: {elapsed_time:.2f} seconds]"
    
    # Wait for the subprocess to finish if it hasn't already
    process.wait()

    # Check for any errors
    if process.returncode != 0:
        error_message = process.stderr.read()
        print(f"Error: {error_message}")


CustomPrompts = [
    "Class Diagram for:",
    "Pydot code for:",
]

with gr.Blocks() as iface: 
    gr.Interface(
        fn=generate_response,
        inputs=gr.Textbox(lines=2, placeholder="Type your message here..."),
        outputs="text",
        title="Stable LM 2 Zephyr (1.6b) LLama.cpp Interface Test",
        description="No Message History for now - Enter your message and get a response.",
        flagging_dir="/usr/src/app/flagged",
    )
    gr.HTML()
    MainOutput = gr.TextArea()
    CustomButtonInput = gr.TextArea()
    CustomButtonClassDiagram = gr.Button(CustomPrompts[0])
    CustomButtonPydotcode = gr.Button(CustomPrompts[1])
    CustomButtonClassDiagram .click(custom_generate_response, inputs=[CustomButtonInput, CustomPrompts[0]], outputs=MainOutput)
    CustomButtonPydotcode.click(custom_generate_response, inputs=[CustomButtonInput, CustomPrompts[1]], outputs=MainOutput)

iface.launch(server_name="0.0.0.0") #share=True)