Spaces:

nakcnx
/

paotung-llama3

Sleeping

App Files Files Community

nakcnx commited on May 10, 2024

Commit

2bb2938

verified ·

1 Parent(s): 7b5a0ee

Create app.py

Browse files

Files changed (1) hide show

app.py +141 -0

app.py ADDED Viewed

	@@ -0,0 +1,141 @@

+import gradio as gr
+from llama_cpp import Llama
+import datetime
+import os
+import datetime
+from huggingface_hub import hf_hub_download
+#MODEL SETTINGS also for DISPLAY
+convHistory = ''
+modelfile = hf_hub_download(
+        repo_id=os.environ.get("REPO_ID", "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF"),
+        filename=os.environ.get("MODEL_FILE", "Meta-Llama-3-8B-Instruct.Q4_K_M.gguf"),
+    )
+repetitionpenalty = 1.15
+contextlength=4096
+logfile = 'Meta-Llama-3-8B-Instruct_logs.txt'
+print("loading model...")
+stt = datetime.datetime.now()
+# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
+llm = Llama(
+  model_path=modelfile,  # Download the model file first
+  n_ctx=contextlength,  # The max sequence length to use - note that longer sequence lengths require much more resources
+  #n_threads=2,            # The number of CPU threads to use, tailor to your system and the resulting performance
+)
+dt = datetime.datetime.now() - stt
+print(f"Model loaded in {dt}")
+def writehistory(text):
+    with open(logfile, 'a') as f:
+        f.write(text)
+        f.write('\n')
+    f.close()
+"""
+gr.themes.Base()
+gr.themes.Default()
+gr.themes.Glass()
+gr.themes.Monochrome()
+gr.themes.Soft()
+"""
+def combine(a, b, c, d,e,f):
+    global convHistory
+    import datetime
+    SYSTEM_PROMPT = f"""{a}
+    """
+    temperature = c
+    max_new_tokens = d
+    repeat_penalty = f
+    top_p = e
+    #prompt = f"<|user|>\n{b}<|endoftext|>\n<|assistant|>"
+    prompt = [
+        {"role": "system", "content": SYSTEM_PROMPT} ,
+        {"role": "user", "content": b},
+    ]
+    prompt = f"""{prompt}"""
+    start = datetime.datetime.now()
+    generation = ""
+    delta = ""
+    prompt_tokens = f"Prompt Tokens: {len(llm.tokenize(bytes(prompt,encoding='utf-8')))}"
+    generated_text = ""
+    answer_tokens = ''
+    total_tokens = ''
+    for character in llm(prompt,
+                max_tokens=max_new_tokens,
+                stop=["</s>"],
+                temperature = temperature,
+                repeat_penalty = repeat_penalty,
+                top_p = top_p,   # Example stop token - not necessarily correct for this specific model! Please check before using.
+                echo=False,
+                stream=True):
+        generation += character["choices"][0]["text"]
+        answer_tokens = f"Out Tkns: {len(llm.tokenize(bytes(generation,encoding='utf-8')))}"
+        total_tokens = f"Total Tkns: {len(llm.tokenize(bytes(prompt,encoding='utf-8'))) + len(llm.tokenize(bytes(generation,encoding='utf-8')))}"
+        delta = datetime.datetime.now() - start
+        yield generation, delta, prompt_tokens, answer_tokens, total_tokens
+    timestamp = datetime.datetime.now()
+    logger = f"""time: {timestamp}\n Temp: {temperature} - MaxNewTokens: {max_new_tokens} - RepPenalty: 1.5 \nPROMPT: \n{prompt}\nStableZephyr3B: {generation}\nGenerated in {delta}\nPromptTokens: {prompt_tokens}   Output Tokens: {answer_tokens}  Total Tokens: {total_tokens}\n\n---\n\n"""
+    writehistory(logger)
+    convHistory = convHistory + prompt + "\n" + generation + "\n"
+    print(convHistory)
+    return generation, delta, prompt_tokens, answer_tokens, total_tokens
+    #return generation, delta
+# MAIN GRADIO INTERFACE
+with gr.Blocks(theme='Medguy/base2') as demo:   #theme=gr.themes.Glass()  #theme='remilia/Ghostly'
+    #TITLE SECTION
+    with gr.Row(variant='compact'):
+            with gr.Column(scale=10):
+                gr.HTML("<center>"
+                + "<h2>🐶 Paotung Llama-3-8B</h2></center>")
+                with gr.Row():
+                        with gr.Column(min_width=80):
+                            gentime = gr.Textbox(value="", placeholder="Generation Time:", min_width=50, show_label=False)
+                        with gr.Column(min_width=80):
+                            prompttokens = gr.Textbox(value="", placeholder="Prompt Tkn:", min_width=50, show_label=False)
+                        with gr.Column(min_width=80):
+                            outputokens = gr.Textbox(value="", placeholder="Output Tkn:", min_width=50, show_label=False)
+                        with gr.Column(min_width=80):
+                            totaltokens = gr.Textbox(value="", placeholder="Total Tokens:", min_width=50, show_label=False)
+    # INTERACTIVE INFOGRAPHIC SECTION
+    # PLAYGROUND INTERFACE SECTION
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown(
+            f"""
+            ### Tunning Parameters""")
+            temp = gr.Slider(label="Temperature",minimum=0.0, maximum=1.0, step=0.01, value=0.42)
+            top_p = gr.Slider(label="Top_P",minimum=0.0, maximum=1.0, step=0.01, value=0.8)
+            repPen = gr.Slider(label="Repetition Penalty",minimum=0.0, maximum=4.0, step=0.01, value=1.2)
+            max_len = gr.Slider(label="Maximum output lenght", minimum=10,maximum=(contextlength-500),step=2, value=900)
+            gr.Markdown(
+            """
+            Fill the System Prompt and User Prompt
+            And then click the Button below
+            """)
+            btn = gr.Button(value="💎🦜 Generate", variant='primary')
+            gr.Markdown(
+            f"""
+            - **Prompt Template**: Llama-3-8B
+            - **Repetition Penalty**: {repetitionpenalty}
+            - **Context Lenght**: {contextlength} tokens
+            - **LLM Engine**: llama-cpp
+            - **Model**: 💎🦜 Llama-3-8B
+            - **Log File**: {logfile}
+            """)
+        with gr.Column(scale=4):
+            txt = gr.Textbox(label="System Prompt", value = "", placeholder = "This models does not have any System prompt...",lines=1, interactive = True)
+            txt_2 = gr.Textbox(label="User Prompt", lines=5, show_copy_button=True)
+            txt_3 = gr.Textbox(value="", label="Output", lines = 10, show_copy_button=True)
+            btn.click(combine, inputs=[txt, txt_2,temp,max_len,top_p,repPen], outputs=[txt_3,gentime,prompttokens,outputokens,totaltokens])
+if __name__ == "__main__":
+    demo.launch(inbrowser=True)