Christoph Holthaus
commited on
Commit
•
a2232d8
1
Parent(s):
493f720
improve
Browse files
app.py
CHANGED
@@ -5,10 +5,13 @@ from time import time
|
|
5 |
import gradio as gr
|
6 |
import psutil
|
7 |
|
|
|
|
|
|
|
|
|
8 |
# Initing things
|
9 |
-
print("debug: init model")
|
10 |
#llm = Llama(model_path="./model.bin") # LLaMa model
|
11 |
-
llama_model_name = "TheBloke/dolphin-2.2.1-AshhLimaRP-Mistral-7B-GGUF"
|
12 |
print("! INITING DONE !")
|
13 |
|
14 |
# Preparing things to work
|
@@ -45,13 +48,14 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
|
45 |
|
46 |
#download model here
|
47 |
# check localstorage, if no there, load, else use existing.
|
|
|
48 |
|
49 |
if torch.cuda.is_available():
|
50 |
model_id = "mistralai/Mistral-7B-Instruct-v0.1"
|
51 |
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
|
52 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
53 |
|
54 |
-
|
55 |
def generate(
|
56 |
message: str,
|
57 |
chat_history: list[tuple[str, str]],
|
@@ -133,6 +137,7 @@ chat_interface = gr.ChatInterface(
|
|
133 |
),
|
134 |
],
|
135 |
stop_btn=None,
|
|
|
136 |
examples=[
|
137 |
["Hello there! How are you doing?"],
|
138 |
["Can you explain briefly to me what is the Python programming language?"],
|
@@ -149,6 +154,7 @@ with gr.Blocks(css="style.css") as demo:
|
|
149 |
value="Duplicate Space for private use",
|
150 |
elem_id="duplicate-button",
|
151 |
visible=os.getenv("SHOW_DUPLICATE_BUTTON") == "1",
|
|
|
152 |
)
|
153 |
chat_interface.render()
|
154 |
|
|
|
5 |
import gradio as gr
|
6 |
import psutil
|
7 |
|
8 |
+
# load like this - use tne variable everywhere
|
9 |
+
model_path=os.getenv("MODEL_PATH")
|
10 |
+
# show warning, when empty and briefs description of how to set it
|
11 |
+
|
12 |
# Initing things
|
13 |
+
print(f"debug: init model: {model_path}")
|
14 |
#llm = Llama(model_path="./model.bin") # LLaMa model
|
|
|
15 |
print("! INITING DONE !")
|
16 |
|
17 |
# Preparing things to work
|
|
|
48 |
|
49 |
#download model here
|
50 |
# check localstorage, if no there, load, else use existing.
|
51 |
+
# check gradio - how does it dl? is there a function we can use?
|
52 |
|
53 |
if torch.cuda.is_available():
|
54 |
model_id = "mistralai/Mistral-7B-Instruct-v0.1"
|
55 |
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
|
56 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
57 |
|
58 |
+
# we need to make sure we only run one thread or we probably run out of ram
|
59 |
def generate(
|
60 |
message: str,
|
61 |
chat_history: list[tuple[str, str]],
|
|
|
137 |
),
|
138 |
],
|
139 |
stop_btn=None,
|
140 |
+
# add more eval examples, like a long list taken from teknium and others maybe group by type
|
141 |
examples=[
|
142 |
["Hello there! How are you doing?"],
|
143 |
["Can you explain briefly to me what is the Python programming language?"],
|
|
|
154 |
value="Duplicate Space for private use",
|
155 |
elem_id="duplicate-button",
|
156 |
visible=os.getenv("SHOW_DUPLICATE_BUTTON") == "1",
|
157 |
+
# add
|
158 |
)
|
159 |
chat_interface.render()
|
160 |
|