Spaces:
Runtime error
Runtime error
quantization_config
Browse files
app.py
CHANGED
@@ -1,9 +1,18 @@
|
|
1 |
import gradio as gr
|
2 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
3 |
|
4 |
-
tokenizer = AutoTokenizer.from_pretrained("TheBloke/SOLAR-10.7B-Instruct-v1.0-uncensored-GPTQ")
|
5 |
model = AutoModelForCausalLM.from_pretrained("TheBloke/SOLAR-10.7B-Instruct-v1.0-uncensored-GPTQ")
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
def generate_response(prompt):
|
8 |
conversation = [{'role': 'user', 'content': prompt}]
|
9 |
prompt = tokenizer.apply_chat_template(conversation, tokenizer=False, add_generation_prompt=True)
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
|
3 |
|
4 |
+
tokenizer = AutoTokenizer.from_pretrained("TheBloke/SOLAR-10.7B-Instruct-v1.0-uncensored-GPTQ", )
|
5 |
model = AutoModelForCausalLM.from_pretrained("TheBloke/SOLAR-10.7B-Instruct-v1.0-uncensored-GPTQ")
|
6 |
|
7 |
+
# Add the quantization config with disable_exllama=True
|
8 |
+
quantization_config = {
|
9 |
+
'disable_exllama': True,
|
10 |
+
}
|
11 |
+
|
12 |
+
model = torch.quantization.quantize_dynamic(
|
13 |
+
model, quantization_config=quantization_config,
|
14 |
+
)
|
15 |
+
|
16 |
def generate_response(prompt):
|
17 |
conversation = [{'role': 'user', 'content': prompt}]
|
18 |
prompt = tokenizer.apply_chat_template(conversation, tokenizer=False, add_generation_prompt=True)
|