|
from langchain.llms import LlamaCpp |
|
from langchain.callbacks.manager import CallbackManager |
|
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler |
|
import gradio as gr |
|
import re |
|
import os |
|
|
|
|
|
|
|
n_gpu_layers = 40 |
|
n_batch = 512 |
|
n_ctx=2048 |
|
|
|
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) |
|
path = "Dorna-Llama3-8B-Instruct-GGUF" |
|
|
|
llm = LlamaCpp( |
|
model_path= path, |
|
n_gpu_layers=n_gpu_layers, n_batch=n_batch, |
|
callback_manager=callback_manager, |
|
verbose=True, |
|
n_ctx=n_ctx, |
|
temperature=0.2, |
|
max_tokens=200, |
|
top_p=1, |
|
) |
|
|
|
prompt = """Below is an instruction that describes a task. |
|
Write a response that appropriately completes the request.\n\n |
|
### Instruction:\n\n{}\n\n\n### Response:\n\n\n""" |
|
def generate_output(text): |
|
result = "" |
|
for s in llm.stream(prompt.format(text)): |
|
result += s |
|
yield result |
|
|
|
|
|
def clear(): |
|
return "", "" |
|
|
|
with gr.Blocks(theme=gr.themes.Soft()) as demo: |
|
with gr.Row(): |
|
inputs=gr.Textbox(label="ورودی",placeholder="سوال خود را وارد کنید",rtl=True) |
|
|
|
with gr.Row(): |
|
submit_btn= gr.Button("ارسال", variant="primary") |
|
clear_btn = gr.ClearButton(value="پاک کردن", variant="secondary") |
|
with gr.Row(): |
|
outputs=gr.Textbox(label="خروجی",rtl=True) |
|
submit_btn.click(fn=generate_output, |
|
inputs= [inputs], |
|
outputs= [outputs]) |
|
clear_btn.click(fn=clear, inputs=[], outputs=[inputs, outputs]) |
|
|
|
|
|
demo.launch(server_name='0.0.0.0',share=True) |
|
|