invishalgupta commited on
Commit
a12ab3c
β€’
1 Parent(s): be7c45b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +136 -0
app.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from llama_cpp import Llama
3
+ import datetime
4
+
5
+ #MODEL SETTINGS also for DISPLAY
6
+ convHistory = ''
7
+ modelfile = "model/stablelm-zephyr-3b.Q4_K_M.gguf"
8
+ repetitionpenalty = 1.15
9
+ contextlength=4096
10
+ logfile = 'StableZephyr3b_logs.txt'
11
+ print("loading model...")
12
+ stt = datetime.datetime.now()
13
+ # Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
14
+ llm = Llama(
15
+ model_path=modelfile, # Download the model file first
16
+ n_ctx=contextlength, # The max sequence length to use - note that longer sequence lengths require much more resources
17
+ #n_threads=2, # The number of CPU threads to use, tailor to your system and the resulting performance
18
+ )
19
+ dt = datetime.datetime.now() - stt
20
+ print(f"Model loaded in {dt}")
21
+
22
+ def writehistory(text):
23
+ with open(logfile, 'a') as f:
24
+ f.write(text)
25
+ f.write('\n')
26
+ f.close()
27
+
28
+ """
29
+ gr.themes.Base()
30
+ gr.themes.Default()
31
+ gr.themes.Glass()
32
+ gr.themes.Monochrome()
33
+ gr.themes.Soft()
34
+ """
35
+ def combine(a, b, c, d,e,f):
36
+ global convHistory
37
+ import datetime
38
+ SYSTEM_PROMPT = f"""{a}
39
+
40
+
41
+ """
42
+ temperature = c
43
+ max_new_tokens = d
44
+ repeat_penalty = f
45
+ top_p = e
46
+ prompt = f"<|user|>\n{b}<|endoftext|>\n<|assistant|>"
47
+ start = datetime.datetime.now()
48
+ generation = ""
49
+ delta = ""
50
+ prompt_tokens = f"Prompt Tokens: {len(llm.tokenize(bytes(prompt,encoding='utf-8')))}"
51
+ generated_text = ""
52
+ answer_tokens = ''
53
+ total_tokens = ''
54
+ for character in llm(prompt,
55
+ max_tokens=max_new_tokens,
56
+ stop=["</s>"],
57
+ temperature = temperature,
58
+ repeat_penalty = repeat_penalty,
59
+ top_p = top_p, # Example stop token - not necessarily correct for this specific model! Please check before using.
60
+ echo=False,
61
+ stream=True):
62
+ generation += character["choices"][0]["text"]
63
+
64
+ answer_tokens = f"Out Tkns: {len(llm.tokenize(bytes(generation,encoding='utf-8')))}"
65
+ total_tokens = f"Total Tkns: {len(llm.tokenize(bytes(prompt,encoding='utf-8'))) + len(llm.tokenize(bytes(generation,encoding='utf-8')))}"
66
+ delta = datetime.datetime.now() - start
67
+ yield generation, delta, prompt_tokens, answer_tokens, total_tokens
68
+ timestamp = datetime.datetime.now()
69
+ logger = f"""time: {timestamp}\n Temp: {temperature} - MaxNewTokens: {max_new_tokens} - RepPenalty: 1.5 \nPROMPT: \n{prompt}\nStableZephyr3B: {generation}\nGenerated in {delta}\nPromptTokens: {prompt_tokens} Output Tokens: {answer_tokens} Total Tokens: {total_tokens}\n\n---\n\n"""
70
+ writehistory(logger)
71
+ convHistory = convHistory + prompt + "\n" + generation + "\n"
72
+ print(convHistory)
73
+ return generation, delta, prompt_tokens, answer_tokens, total_tokens
74
+ #return generation, delta
75
+
76
+
77
+ # MAIN GRADIO INTERFACE
78
+ with gr.Blocks(theme='Medguy/base2') as demo: #theme=gr.themes.Glass() #theme='remilia/Ghostly'
79
+ #TITLE SECTION
80
+ with gr.Row(variant='compact'):
81
+ with gr.Column(scale=3):
82
+ gr.Image(value='https://github.com/fabiomatricardi/GradioStudies/raw/main/20231205/logo-banner-StableZephyr.jpg',
83
+ show_label = False,
84
+ show_download_button = False, container = False)
85
+ with gr.Column(scale=10):
86
+ gr.HTML("<center>"
87
+ + "<h3>Prompt Engineering Playground!</h3>"
88
+ + "<h1>πŸ’ŽπŸ¦œ StableLM-Zephyr-3B - 4K context window</h2></center>")
89
+ with gr.Row():
90
+ with gr.Column(min_width=80):
91
+ gentime = gr.Textbox(value="", placeholder="Generation Time:", min_width=50, show_label=False)
92
+ with gr.Column(min_width=80):
93
+ prompttokens = gr.Textbox(value="", placeholder="Prompt Tkn:", min_width=50, show_label=False)
94
+ with gr.Column(min_width=80):
95
+ outputokens = gr.Textbox(value="", placeholder="Output Tkn:", min_width=50, show_label=False)
96
+ with gr.Column(min_width=80):
97
+ totaltokens = gr.Textbox(value="", placeholder="Total Tokens:", min_width=50, show_label=False)
98
+ # INTERACTIVE INFOGRAPHIC SECTION
99
+
100
+
101
+ # PLAYGROUND INTERFACE SECTION
102
+ with gr.Row():
103
+ with gr.Column(scale=1):
104
+ gr.Markdown(
105
+ f"""
106
+ ### Tunning Parameters""")
107
+ temp = gr.Slider(label="Temperature",minimum=0.0, maximum=1.0, step=0.01, value=0.42)
108
+ top_p = gr.Slider(label="Top_P",minimum=0.0, maximum=1.0, step=0.01, value=0.8)
109
+ repPen = gr.Slider(label="Repetition Penalty",minimum=0.0, maximum=4.0, step=0.01, value=1.2)
110
+ max_len = gr.Slider(label="Maximum output lenght", minimum=10,maximum=(contextlength-500),step=2, value=900)
111
+ gr.Markdown(
112
+ """
113
+ Fill the System Prompt and User Prompt
114
+ And then click the Button below
115
+ """)
116
+ btn = gr.Button(value="πŸ’ŽπŸ¦œ Generate", variant='primary')
117
+ gr.Markdown(
118
+ f"""
119
+ - **Prompt Template**: StableLM-Zephyr πŸ’ŽπŸ¦œ
120
+ - **Repetition Penalty**: {repetitionpenalty}
121
+ - **Context Lenght**: {contextlength} tokens
122
+ - **LLM Engine**: llama-cpp
123
+ - **Model**: πŸ’ŽπŸ¦œ StableLM-Zephyr-7b
124
+ - **Log File**: {logfile}
125
+ """)
126
+
127
+
128
+ with gr.Column(scale=4):
129
+ txt = gr.Textbox(label="System Prompt", value = "", placeholder = "This models does not have any System prompt...",lines=1, interactive = False)
130
+ txt_2 = gr.Textbox(label="User Prompt", lines=6, show_copy_button=True)
131
+ txt_3 = gr.Textbox(value="", label="Output", lines = 12, show_copy_button=True)
132
+ btn.click(combine, inputs=[txt, txt_2,temp,max_len,top_p,repPen], outputs=[txt_3,gentime,prompttokens,outputokens,totaltokens])
133
+
134
+
135
+ if __name__ == "__main__":
136
+ demo.launch(inbrowser=True)