uaritm commited on
Commit
df692e5
β€’
1 Parent(s): a23d40f

Delete app

Browse files
Files changed (1) hide show
  1. app +0 -142
app DELETED
@@ -1,142 +0,0 @@
1
- import gradio as gr
2
- from llama_cpp import Llama
3
- import datetime
4
- import os
5
- import datetime
6
- from huggingface_hub import hf_hub_download
7
-
8
- #MODEL SETTINGS also for DISPLAY
9
- convHistory = ''
10
- modelfile = hf_hub_download(
11
- repo_id=os.environ.get("REPO_ID", "TheBloke/stablelm-zephyr-3b-GGUF"),
12
- filename=os.environ.get("MODEL_FILE", "stablelm-zephyr-3b.Q4_K_M.gguf"),
13
- )
14
- repetitionpenalty = 1.15
15
- contextlength=4096
16
- logfile = 'StableZephyr3b_logs.txt'
17
- print("loading model...")
18
- stt = datetime.datetime.now()
19
- # Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
20
- llm = Llama(
21
- model_path=modelfile, # Download the model file first
22
- n_ctx=contextlength, # The max sequence length to use - note that longer sequence lengths require much more resources
23
- #n_threads=2, # The number of CPU threads to use, tailor to your system and the resulting performance
24
- )
25
- dt = datetime.datetime.now() - stt
26
- print(f"Model loaded in {dt}")
27
-
28
- def writehistory(text):
29
- with open(logfile, 'a') as f:
30
- f.write(text)
31
- f.write('\n')
32
- f.close()
33
-
34
- """
35
- gr.themes.Base()
36
- gr.themes.Default()
37
- gr.themes.Glass()
38
- gr.themes.Monochrome()
39
- gr.themes.Soft()
40
- """
41
- def combine(a, b, c, d,e,f):
42
- global convHistory
43
- import datetime
44
- SYSTEM_PROMPT = f"""{a}
45
-
46
-
47
- """
48
- temperature = c
49
- max_new_tokens = d
50
- repeat_penalty = f
51
- top_p = e
52
- prompt = f"<|user|>\n{b}<|endoftext|>\n<|assistant|>"
53
- start = datetime.datetime.now()
54
- generation = ""
55
- delta = ""
56
- prompt_tokens = f"Prompt Tokens: {len(llm.tokenize(bytes(prompt,encoding='utf-8')))}"
57
- generated_text = ""
58
- answer_tokens = ''
59
- total_tokens = ''
60
- for character in llm(prompt,
61
- max_tokens=max_new_tokens,
62
- stop=["</s>"],
63
- temperature = temperature,
64
- repeat_penalty = repeat_penalty,
65
- top_p = top_p, # Example stop token - not necessarily correct for this specific model! Please check before using.
66
- echo=False,
67
- stream=True):
68
- generation += character["choices"][0]["text"]
69
-
70
- answer_tokens = f"Out Tkns: {len(llm.tokenize(bytes(generation,encoding='utf-8')))}"
71
- total_tokens = f"Total Tkns: {len(llm.tokenize(bytes(prompt,encoding='utf-8'))) + len(llm.tokenize(bytes(generation,encoding='utf-8')))}"
72
- delta = datetime.datetime.now() - start
73
- yield generation, delta, prompt_tokens, answer_tokens, total_tokens
74
- timestamp = datetime.datetime.now()
75
- logger = f"""time: {timestamp}\n Temp: {temperature} - MaxNewTokens: {max_new_tokens} - RepPenalty: 1.5 \nPROMPT: \n{prompt}\nStableZephyr3B: {generation}\nGenerated in {delta}\nPromptTokens: {prompt_tokens} Output Tokens: {answer_tokens} Total Tokens: {total_tokens}\n\n---\n\n"""
76
- writehistory(logger)
77
- convHistory = convHistory + prompt + "\n" + generation + "\n"
78
- print(convHistory)
79
- return generation, delta, prompt_tokens, answer_tokens, total_tokens
80
- #return generation, delta
81
-
82
-
83
- # MAIN GRADIO INTERFACE
84
- with gr.Blocks(theme='Medguy/base2') as demo: #theme=gr.themes.Glass() #theme='remilia/Ghostly'
85
- #TITLE SECTION
86
- with gr.Row(variant='compact'):
87
- with gr.Column(scale=3):
88
- gr.Image(value='https://github.com/fabiomatricardi/GradioStudies/raw/main/20231205/logo-banner-StableZephyr.jpg',
89
- show_label = False,
90
- show_download_button = False, container = False)
91
- with gr.Column(scale=10):
92
- gr.HTML("<center>"
93
- + "<h3>Prompt Engineering Playground!</h3>"
94
- + "<h1>πŸ’ŽπŸ¦œ StableLM-Zephyr-3B - 4K context window</h2></center>")
95
- with gr.Row():
96
- with gr.Column(min_width=80):
97
- gentime = gr.Textbox(value="", placeholder="Generation Time:", min_width=50, show_label=False)
98
- with gr.Column(min_width=80):
99
- prompttokens = gr.Textbox(value="", placeholder="Prompt Tkn:", min_width=50, show_label=False)
100
- with gr.Column(min_width=80):
101
- outputokens = gr.Textbox(value="", placeholder="Output Tkn:", min_width=50, show_label=False)
102
- with gr.Column(min_width=80):
103
- totaltokens = gr.Textbox(value="", placeholder="Total Tokens:", min_width=50, show_label=False)
104
- # INTERACTIVE INFOGRAPHIC SECTION
105
-
106
-
107
- # PLAYGROUND INTERFACE SECTION
108
- with gr.Row():
109
- with gr.Column(scale=1):
110
- gr.Markdown(
111
- f"""
112
- ### Tunning Parameters""")
113
- temp = gr.Slider(label="Temperature",minimum=0.0, maximum=1.0, step=0.01, value=0.42)
114
- top_p = gr.Slider(label="Top_P",minimum=0.0, maximum=1.0, step=0.01, value=0.8)
115
- repPen = gr.Slider(label="Repetition Penalty",minimum=0.0, maximum=4.0, step=0.01, value=1.2)
116
- max_len = gr.Slider(label="Maximum output lenght", minimum=10,maximum=(contextlength-500),step=2, value=900)
117
- gr.Markdown(
118
- """
119
- Fill the System Prompt and User Prompt
120
- And then click the Button below
121
- """)
122
- btn = gr.Button(value="πŸ’ŽπŸ¦œ Generate", variant='primary')
123
- gr.Markdown(
124
- f"""
125
- - **Prompt Template**: StableLM-Zephyr πŸ’ŽπŸ¦œ
126
- - **Repetition Penalty**: {repetitionpenalty}
127
- - **Context Lenght**: {contextlength} tokens
128
- - **LLM Engine**: llama-cpp
129
- - **Model**: πŸ’ŽπŸ¦œ StableLM-Zephyr-7b
130
- - **Log File**: {logfile}
131
- """)
132
-
133
-
134
- with gr.Column(scale=4):
135
- txt = gr.Textbox(label="System Prompt", value = "", placeholder = "This models does not have any System prompt...",lines=1, interactive = False)
136
- txt_2 = gr.Textbox(label="User Prompt", lines=6, show_copy_button=True)
137
- txt_3 = gr.Textbox(value="", label="Output", lines = 12, show_copy_button=True)
138
- btn.click(combine, inputs=[txt, txt_2,temp,max_len,top_p,repPen], outputs=[txt_3,gentime,prompttokens,outputokens,totaltokens])
139
-
140
-
141
- if __name__ == "__main__":
142
- demo.launch(inbrowser=True)