schroneko commited on
Commit
691eb11
·
verified ·
1 Parent(s): 3c1404f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -5
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
2
  import torch
3
- from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
4
  import gradio as gr
5
  import spaces
6
 
@@ -8,12 +8,10 @@ huggingface_token = os.getenv('HUGGINGFACE_TOKEN')
8
  if not huggingface_token:
9
  raise ValueError("HUGGINGFACE_TOKEN environment variable is not set")
10
 
11
- model_id = "meta-llama/Llama-Guard-3-8B-INT8"
12
  device = "cuda" if torch.cuda.is_available() else "cpu"
13
  dtype = torch.bfloat16
14
 
15
- quantization_config = BitsAndBytesConfig(load_in_8bit=True)
16
-
17
  def parse_llama_guard_output(result):
18
  # "<END CONVERSATION>" 以降の部分を抽出
19
  safety_assessment = result.split("<END CONVERSATION>")[-1].strip()
@@ -43,7 +41,6 @@ def moderate(user_input, assistant_response):
43
  model_id,
44
  torch_dtype=dtype,
45
  device_map="auto",
46
- quantization_config=quantization_config,
47
  token=huggingface_token,
48
  low_cpu_mem_usage=True
49
  )
 
1
  import os
2
  import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import gradio as gr
5
  import spaces
6
 
 
8
  if not huggingface_token:
9
  raise ValueError("HUGGINGFACE_TOKEN environment variable is not set")
10
 
11
+ model_id = "meta-llama/Llama-Guard-3-1B"
12
  device = "cuda" if torch.cuda.is_available() else "cpu"
13
  dtype = torch.bfloat16
14
 
 
 
15
  def parse_llama_guard_output(result):
16
  # "<END CONVERSATION>" 以降の部分を抽出
17
  safety_assessment = result.split("<END CONVERSATION>")[-1].strip()
 
41
  model_id,
42
  torch_dtype=dtype,
43
  device_map="auto",
 
44
  token=huggingface_token,
45
  low_cpu_mem_usage=True
46
  )