Spaces:

t-bank-ai
/

caif

Runtime error

App Files Files Community

Балаганский Никита Николаевич commited on Jun 6, 2022

Commit

47f9ff2

•

1 Parent(s): d320fdd

add entropy_threshold

Browse files

Files changed (1) hide show

app.py +25 -8

app.py CHANGED Viewed

@@ -19,18 +19,24 @@ def main():
     cls_model_name = st.selectbox(
         'Выберите модель классификации',
         ('tinkoff-ai/response-quality-classifier-tiny', 'tinkoff-ai/response-quality-classifier-base',
-         'tinkoff-ai/response-quality-classifier-large')
     )
     lm_model_name = st.selectbox(
         'Выберите языковую модель',
         ('sberbank-ai/rugpt3small_based_on_gpt2',)
     )
     cls_model_config = transformers.AutoConfig.from_pretrained(cls_model_name)
-    label2id = cls_model_config.label2id
-    label_key = st.selectbox("Веберите нужный атрибут текста", label2id.keys())
-    target_label_id = label2id[label_key]
     prompt = st.text_input("Начало текста:", "Привет")
-    alpha = st.slider("Alpha:", min_value=-10, max_value=10, step=1)
     auth_token = os.environ.get('TOKEN') or True
     with st.spinner('Running inference...'):
         text = inference(lm_model_name=lm_model_name, cls_model_name=cls_model_name, prompt=prompt, alpha=alpha)
@@ -55,11 +61,22 @@ def load_sampler(cls_model_name, lm_tokenizer):
 @st.cache
 def inference(
-        lm_model_name: str, cls_model_name: str, prompt: str, fp16: bool = True, alpha: float = 5, target_label_id: int = 0
 ) -> str:
     generator = load_generator(lm_model_name=lm_model_name)
     lm_tokenizer = transformers.AutoTokenizer.from_pretrained(lm_model_name)
-    caif_sampler = load_sampler(cls_model_name=cls_model_name, lm_tokenizer=lm_tokenizer)
     generator.set_caif_sampler(caif_sampler)
     ordinary_sampler = TopKWithTemperatureSampler()
     kwargs = {
@@ -81,7 +98,7 @@ def inference(
             input_prompt=prompt,
             max_length=20,
             caif_period=1,
-            entropy=None,
             **kwargs
         )
         print(f"Output for prompt: {sequences}")

     cls_model_name = st.selectbox(
         'Выберите модель классификации',
         ('tinkoff-ai/response-quality-classifier-tiny', 'tinkoff-ai/response-quality-classifier-base',
+         'tinkoff-ai/response-quality-classifier-large', "SkolkovoInstitute/roberta_toxicity_classifier")
     )
     lm_model_name = st.selectbox(
         'Выберите языковую модель',
         ('sberbank-ai/rugpt3small_based_on_gpt2',)
     )
     cls_model_config = transformers.AutoConfig.from_pretrained(cls_model_name)
+    if cls_model_config.problem_type == "multi_label_classification":
+        label2id = cls_model_config.label2id
+        label_key = st.selectbox("Веберите нужный атрибут текста", label2id.keys())
+        target_label_id = label2id[label_key]
+    else:
+        label2id = cls_model_config.label2id
+        label_key = st.selectbox("Веберите нужный атрибут текста", list(label2id.keys())[-1])
+        target_label_id = 0
     prompt = st.text_input("Начало текста:", "Привет")
+    alpha = st.slider("Alpha:", min_value=-10, max_value=10, step=1, value=0)
+    entropy_threshold = st.slider("Entropy Threshold:", min_value=0., max_value=5., step=.1, value=0.)
     auth_token = os.environ.get('TOKEN') or True
     with st.spinner('Running inference...'):
         text = inference(lm_model_name=lm_model_name, cls_model_name=cls_model_name, prompt=prompt, alpha=alpha)
 @st.cache
 def inference(
+        lm_model_name: str,
+        cls_model_name: str,
+        prompt: str,
+        fp16: bool = True,
+        alpha: float = 5,
+        target_label_id: int = 0,
+        entropy_threshold: float = 0
 ) -> str:
     generator = load_generator(lm_model_name=lm_model_name)
     lm_tokenizer = transformers.AutoTokenizer.from_pretrained(lm_model_name)
+    if alpha != 0:
+        caif_sampler = load_sampler(cls_model_name=cls_model_name, lm_tokenizer=lm_tokenizer)
+    else:
+        caif_sampler = None
+    if entropy_threshold < 0.05:
+        entropy_threshold = None
     generator.set_caif_sampler(caif_sampler)
     ordinary_sampler = TopKWithTemperatureSampler()
     kwargs = {
             input_prompt=prompt,
             max_length=20,
             caif_period=1,
+            entropy=entropy_threshold,
             **kwargs
         )
         print(f"Output for prompt: {sequences}")