pngwn HF staff commited on
Commit
eaf6d5b
1 Parent(s): d1a85cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -15
app.py CHANGED
@@ -2,17 +2,21 @@ import os
2
  import gradio as gr
3
  import copy
4
  from llama_cpp import Llama
5
- from huggingface_hub import hf_hub_download
6
 
7
 
8
- llm = Llama(
9
- model_path=hf_hub_download(
10
- repo_id=os.environ.get("REPO_ID", "microsoft/Phi-3-mini-4k-instruct-gguf"),
11
- filename=os.environ.get("MODEL_FILE", "Phi-3-mini-4k-instruct-q4.gguf"),
12
- ),
13
- n_ctx=2048,
14
- n_gpu_layers=-1, # change n_gpu_layers if you have more or less VRAM
15
- )
 
 
 
 
16
 
17
 
18
  def generate_text(
@@ -26,7 +30,13 @@ def generate_text(
26
  temp = ""
27
  input_prompt = f"[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n "
28
  for interaction in history:
29
- input_prompt = input_prompt + str(interaction[0]) + " [/INST] " + str(interaction[1]) + " </s><s> [INST] "
 
 
 
 
 
 
30
 
31
  input_prompt = input_prompt + str(message) + " [/INST] "
32
 
@@ -58,11 +68,11 @@ demo = gr.ChatInterface(
58
  title="llama-cpp-python on GPU",
59
  description="Running LLM with https://github.com/abetlen/llama-cpp-python",
60
  examples=[
61
- ['How to setup a human base on Mars? Give short answer.'],
62
- ['Explain theory of relativity to me like I’m 8 years old.'],
63
- ['What is 9,000 * 9,000?'],
64
- ['Write a pun-filled happy birthday message to my friend Alex.'],
65
- ['Justify why a penguin might make a good king of the jungle.']
66
  ],
67
  cache_examples=False,
68
  retry_btn=None,
 
2
  import gradio as gr
3
  import copy
4
  from llama_cpp import Llama
5
+ from huggingface_hub import hf_hub_download
6
 
7
 
8
+ try:
9
+ llm = Llama(
10
+ model_path=hf_hub_download(
11
+ repo_id=os.environ.get("REPO_ID", "microsoft/Phi-3-mini-4k-instruct-gguf"),
12
+ filename=os.environ.get("MODEL_FILE", "Phi-3-mini-4k-instruct-q4.gguf"),
13
+ ),
14
+ n_ctx=2048,
15
+ n_gpu_layers=-1, # change n_gpu_layers if you have more or less VRAM
16
+ )
17
+
18
+ except Exception as e:
19
+ print(e)
20
 
21
 
22
  def generate_text(
 
30
  temp = ""
31
  input_prompt = f"[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n "
32
  for interaction in history:
33
+ input_prompt = (
34
+ input_prompt
35
+ + str(interaction[0])
36
+ + " [/INST] "
37
+ + str(interaction[1])
38
+ + " </s><s> [INST] "
39
+ )
40
 
41
  input_prompt = input_prompt + str(message) + " [/INST] "
42
 
 
68
  title="llama-cpp-python on GPU",
69
  description="Running LLM with https://github.com/abetlen/llama-cpp-python",
70
  examples=[
71
+ ["How to setup a human base on Mars? Give short answer."],
72
+ ["Explain theory of relativity to me like I’m 8 years old."],
73
+ ["What is 9,000 * 9,000?"],
74
+ ["Write a pun-filled happy birthday message to my friend Alex."],
75
+ ["Justify why a penguin might make a good king of the jungle."],
76
  ],
77
  cache_examples=False,
78
  retry_btn=None,