Staticaliza commited on
Commit
a3a72ca
1 Parent(s): 874afa2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -14
app.py CHANGED
@@ -1,17 +1,71 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
-
4
- def generate_text(prompt):
5
- model_name = "TheBloke/openchat_3.5-GGUF"
6
- generator = pipeline('text-generation', model=model_name)
7
- return generator(prompt, max_length=50)[0]['generated_text']
8
-
9
- iface = gr.Interface(
10
- fn=generate_text,
11
- inputs=gr.inputs.Textbox(lines=2, placeholder="Enter your prompt here..."),
12
- outputs="text",
13
- title="GPT-3.5 Text Generation",
14
- description="This is a demo for GPT-3.5 text generation model hosted on Hugging Face."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  )
16
 
17
- iface.launch()
 
 
 
 
 
1
  import gradio as gr
2
+ from gpt4all import GPT4All
3
+ from huggingface_hub import hf_hub_download
4
+
5
+ model_path = "models"
6
+ model_name = "openchat_3.5-GGUF/blob/main/openchat_3.5.Q4_K_M.gguf"
7
+ hf_hub_download(repo_id="TheBloke/openchat_3.5-GGUF", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)
8
+
9
+ model = model = GPT4All(model_name, model_path, allow_download = False, device="cpu")
10
+
11
+ model.config["promptTemplate"] = "[INST] {0} [/INST]"
12
+ model.config["systemPrompt"] = ""
13
+ model._is_chat_session_activated = False
14
+
15
+ max_new_tokens = 2048
16
+
17
+ def generater(message, history, temperature, top_p, top_k):
18
+ prompt = "<s>"
19
+ for user_message, assistant_message in history:
20
+ prompt += model.config["promptTemplate"].format(user_message)
21
+ prompt += assistant_message + "</s>"
22
+ prompt += model.config["promptTemplate"].format(message)
23
+ outputs = []
24
+ for token in model.generate(prompt=prompt, temp=temperature, top_k = top_k, top_p = top_p, max_tokens = max_new_tokens, streaming=True):
25
+ outputs.append(token)
26
+ yield "".join(outputs)
27
+
28
+ chatbot = gr.Chatbot()
29
+
30
+ additional_inputs=[
31
+ gr.Slider(
32
+ label="temperature",
33
+ value=0.5,
34
+ minimum=0.0,
35
+ maximum=2.0,
36
+ step=0.05,
37
+ interactive=True,
38
+ info="Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.",
39
+ ),
40
+ gr.Slider(
41
+ label="top_p",
42
+ value=1.0,
43
+ minimum=0.0,
44
+ maximum=1.0,
45
+ step=0.01,
46
+ interactive=True,
47
+ info="0.1 means only the tokens comprising the top 10% probability mass are considered. Suggest set to 1 and use temperature. 1 means 100% and will disable it",
48
+ ),
49
+ gr.Slider(
50
+ label="top_k",
51
+ value=40,
52
+ minimum=0,
53
+ maximum=1000,
54
+ step=1,
55
+ interactive=True,
56
+ info="limits candidate tokens to a fixed number after sorting by probability. Setting it higher than the vocabulary size deactivates this limit.",
57
+ )
58
+ ]
59
+
60
+ iface = gr.ChatInterface(
61
+ fn = generater,
62
+ title="AI Demo",
63
+ chatbot=chatbot,
64
+ additional_inputs=additional_inputs,
65
  )
66
 
67
+ with gr.Blocks() as demo:
68
+ iface.render()
69
+
70
+ if __name__ == "__main__":
71
+ demo.queue(max_size=3).launch()