artificialguybr commited on
Commit
b58bfab
1 Parent(s): 894a386

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -0
app.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ import gradio as gr
5
+ from threading import Thread
6
+
7
+ MODEL = "THUDM/LongWriter-glm4-9b"
8
+
9
+ TITLE = "<h1><center>LongWriter-glm4-9b</center></h1>"
10
+
11
+ PLACEHOLDER = """
12
+ <center>
13
+ <p>Hi! I'm LongWriter-glm4-9b, capable of generating 10,000+ words. How can I assist you today?</p>
14
+ </center>
15
+ """
16
+
17
+ CSS = """
18
+ .duplicate-button {
19
+ margin: auto !important;
20
+ color: white !important;
21
+ background: black !important;
22
+ border-radius: 100vh !important;
23
+ }
24
+ h3 {
25
+ text-align: center;
26
+ }
27
+ """
28
+
29
+ device = "cuda" if torch.cuda.is_available() else "cpu"
30
+
31
+ tokenizer = AutoTokenizer.from_pretrained(MODEL, trust_remote_code=True)
32
+ model = AutoModelForCausalLM.from_pretrained(MODEL, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto")
33
+ model = model.eval()
34
+
35
+ def stream_chat(
36
+ message: str,
37
+ history: list,
38
+ system_prompt: str,
39
+ temperature: float = 0.5,
40
+ max_new_tokens: int = 32768,
41
+ top_p: float = 1.0,
42
+ top_k: int = 50,
43
+ ):
44
+ print(f'message: {message}')
45
+ print(f'history: {history}')
46
+
47
+ # Prepare the conversation history
48
+ chat_history = []
49
+ for prompt, answer in history:
50
+ chat_history.append((prompt, answer))
51
+
52
+ # Generate the response
53
+ for response, _ in model.stream_chat(
54
+ tokenizer,
55
+ message,
56
+ chat_history,
57
+ max_new_tokens=max_new_tokens,
58
+ top_p=top_p,
59
+ top_k=top_k,
60
+ temperature=temperature,
61
+ ):
62
+ yield response
63
+
64
+ chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)
65
+
66
+ with gr.Blocks(css=CSS, theme="soft") as demo:
67
+ gr.HTML(TITLE)
68
+ gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
69
+ gr.ChatInterface(
70
+ fn=stream_chat,
71
+ chatbot=chatbot,
72
+ fill_height=True,
73
+ additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False),
74
+ additional_inputs=[
75
+ gr.Textbox(
76
+ value="You are a helpful assistant capable of generating long-form content.",
77
+ label="System Prompt",
78
+ ),
79
+ gr.Slider(
80
+ minimum=0,
81
+ maximum=1,
82
+ step=0.1,
83
+ value=0.5,
84
+ label="Temperature",
85
+ ),
86
+ gr.Slider(
87
+ minimum=1024,
88
+ maximum=32768,
89
+ step=1024,
90
+ value=32768,
91
+ label="Max new tokens",
92
+ ),
93
+ gr.Slider(
94
+ minimum=0.0,
95
+ maximum=1.0,
96
+ step=0.1,
97
+ value=1.0,
98
+ label="Top p",
99
+ ),
100
+ gr.Slider(
101
+ minimum=1,
102
+ maximum=100,
103
+ step=1,
104
+ value=50,
105
+ label="Top k",
106
+ ),
107
+ ],
108
+ examples=[
109
+ ["Write a 10000-word comprehensive guide on artificial intelligence and its applications."],
110
+ ["Create a detailed 5000-word business plan for a space tourism company."],
111
+ ["Compose a 3000-word short story about time travel and its consequences."],
112
+ ["Develop a 7000-word research proposal on the potential of quantum computing in cryptography."],
113
+ ],
114
+ cache_examples=False,
115
+ )
116
+
117
+ if __name__ == "__main__":
118
+ demo.launch()