Update app.py
Browse files
app.py
CHANGED
@@ -9,7 +9,6 @@ dtype = torch.bfloat16
|
|
9 |
|
10 |
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
11 |
|
12 |
-
@spaces.GPU
|
13 |
def load_model():
|
14 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
15 |
model = AutoModelForCausalLM.from_pretrained(
|
@@ -22,6 +21,7 @@ def load_model():
|
|
22 |
|
23 |
tokenizer, model = load_model()
|
24 |
|
|
|
25 |
def moderate(user_input, assistant_response):
|
26 |
chat = [
|
27 |
{"role": "user", "content": user_input},
|
@@ -32,11 +32,8 @@ def moderate(user_input, assistant_response):
|
|
32 |
prompt_len = input_ids.shape[-1]
|
33 |
return tokenizer.decode(output[0][prompt_len:], skip_special_tokens=True)
|
34 |
|
35 |
-
def gradio_moderate(user_input, assistant_response):
|
36 |
-
return moderate(user_input, assistant_response)
|
37 |
-
|
38 |
iface = gr.Interface(
|
39 |
-
fn=
|
40 |
inputs=[
|
41 |
gr.Textbox(lines=3, label="User Input"),
|
42 |
gr.Textbox(lines=3, label="Assistant Response")
|
|
|
9 |
|
10 |
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
11 |
|
|
|
12 |
def load_model():
|
13 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
14 |
model = AutoModelForCausalLM.from_pretrained(
|
|
|
21 |
|
22 |
tokenizer, model = load_model()
|
23 |
|
24 |
+
@spaces.GPU
|
25 |
def moderate(user_input, assistant_response):
|
26 |
chat = [
|
27 |
{"role": "user", "content": user_input},
|
|
|
32 |
prompt_len = input_ids.shape[-1]
|
33 |
return tokenizer.decode(output[0][prompt_len:], skip_special_tokens=True)
|
34 |
|
|
|
|
|
|
|
35 |
iface = gr.Interface(
|
36 |
+
fn=moderate,
|
37 |
inputs=[
|
38 |
gr.Textbox(lines=3, label="User Input"),
|
39 |
gr.Textbox(lines=3, label="Assistant Response")
|