Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -10,11 +10,12 @@ from threading import Thread
|
|
10 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
11 |
MODEL_ID = "CohereForAI/aya-23-8B"
|
12 |
MODEL_ID2 = "CohereForAI/aya-23-35B"
|
13 |
-
|
|
|
14 |
|
15 |
TITLE = "<h1><center>Aya-23-Chatbox</center></h1>"
|
16 |
|
17 |
-
DESCRIPTION = f'<h3><center>MODEL: <a href="https://hf.co/{
|
18 |
|
19 |
CSS = """
|
20 |
.duplicate-button {
|
@@ -49,13 +50,13 @@ if USE_FLASH_ATTENTION:
|
|
49 |
attn_implementation="flash_attention_2"
|
50 |
|
51 |
model = AutoModelForCausalLM.from_pretrained(
|
52 |
-
|
53 |
quantization_config=quantization_config,
|
54 |
attn_implementation=attn_implementation,
|
55 |
torch_dtype=torch.bfloat16,
|
56 |
device_map="auto",
|
57 |
)
|
58 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
59 |
|
60 |
@spaces.GPU
|
61 |
def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int):
|
|
|
10 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
11 |
MODEL_ID = "CohereForAI/aya-23-8B"
|
12 |
MODEL_ID2 = "CohereForAI/aya-23-35B"
|
13 |
+
MODELS = os.environ.get("MODELS")
|
14 |
+
MODEL_NAME = MODELS.split("/")[-1]
|
15 |
|
16 |
TITLE = "<h1><center>Aya-23-Chatbox</center></h1>"
|
17 |
|
18 |
+
DESCRIPTION = f'<h3><center>MODEL: <a href="https://hf.co/{MODELS}">{MODEL_NAME}</a></center></h3>'
|
19 |
|
20 |
CSS = """
|
21 |
.duplicate-button {
|
|
|
50 |
attn_implementation="flash_attention_2"
|
51 |
|
52 |
model = AutoModelForCausalLM.from_pretrained(
|
53 |
+
MODELS,
|
54 |
quantization_config=quantization_config,
|
55 |
attn_implementation=attn_implementation,
|
56 |
torch_dtype=torch.bfloat16,
|
57 |
device_map="auto",
|
58 |
)
|
59 |
+
tokenizer = AutoTokenizer.from_pretrained(MODELS)
|
60 |
|
61 |
@spaces.GPU
|
62 |
def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int):
|