Spaces:
Running
Running
File size: 3,613 Bytes
df5c858 95c389b df5c858 95c389b df5c858 95c389b df5c858 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
from langchain.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
import gradio as gr
import re
import os
# MODEL_PATH = "persian_llama_7b.Q8_K_M.gguf"
# TEMPRATURE = 0.3
# MAX_TOKENS = 800
# for k,v in os.environ.items():
# if(k=="MODEL_PATH"):
# MODEL_PATH = v
# if(k== "TEMPRATURE"):
# TEMPRATURE = v
# if(k == "MAX_TOKENS"):
# MAX_TOKENS = v
# print("model: "+MODEL_PATH)
# print("temp: "+TEMPRATURE)
# print("max_tokens: "+MAX_TOKENS)
n_gpu_layers = 40 # Change this value based on your model and your GPU VRAM pool.
n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
n_ctx=2048
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
path = "mostafaamiri/persian-llama-7b-GGUF-Q4/persian_llama_7b.Q8_K_M.gguf"
# persian_llama_7b.Q4_K_M.gguf
# persian_llama_7b.Q8_K_M.gguf
# persian_llama_7b.f32.gguf
# Make sure the model path is correct for your system!
llm = LlamaCpp(
model_path= path,
n_gpu_layers=n_gpu_layers, n_batch=n_batch,
callback_manager=callback_manager,
verbose=True,
n_ctx=n_ctx,
temperature=TEMPRATURE,
max_tokens=MAX_TOKENS,
top_p=1,
)
def generate_output(text):
result = ""
for s in llm.stream(text):
result += s
yield result
def clear():
return "", ""
# def like_log(input, output):
# with open("like_log.txt", "a") as f:
# f.write("{\"model\": \""+MODEL_PATH+"\",\n\"temprature\": "+TEMPRATURE+",\n\"input\": \""+input+"\",\n\"output\": \""+output+"\"},\n")
# def dislike_log(input, output):
# with open("dislike_log.txt", "a") as f:
# f.write("{\"model\": \""+MODEL_PATH+"\",\n\"temprature\": "+TEMPRATURE+",\n\"input\": \""+input+"\",\n\"output\": \""+output+"\"},\n")
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown(
dal_image+
"""
<br>
<div dir="rtl">
<h1>
مدل هوش مصنوعی دال
</h1>
<p dir="rtl">
تماس با ما با
<br/>
info[@]aidal.ir
</p>
</div>
""")
with gr.Row():
inputs=gr.Textbox(label="ورودی",placeholder="سوال خود را وارد کنید",rtl=True)
with gr.Row():
submit_btn= gr.Button("ارسال", variant="primary")
clear_btn = gr.ClearButton(value="پاک کردن", variant="secondary")
with gr.Row():
outputs=gr.Textbox(label="خروجی",rtl=True)
submit_btn.click(fn=generate_output,
inputs= [inputs],
outputs= [outputs])
clear_btn.click(fn=clear, inputs=[], outputs=[inputs, outputs])
# with gr.Row():
# like_btn= gr.Button("👍🏾")
# dislike_btn= gr.Button("👎🏾")
# like_btn.click(fn=like_log,
# inputs= [inputs, outputs],
# outputs=[]
# )
# dislike_btn.click(fn=dislike_log,
# inputs= [inputs, outputs],
# outputs=[]
)
# gr_interface = gr.Interface(fn=generate_output,
# inputs=gr.Textbox(label="ورودی",placeholder="سوال خود را وارد کنید",rtl=True),
# outputs=gr.Textbox(label="خروجی",rtl=True),
# live=False,
# flagging_options=["👍🏾","👎🏾"],
# concurrency_limit=5)
demo.launch(server_name='0.0.0.0',share=True) |