Spaces:

CAGmllab
/

tst

Runtime error

App Files Files Community

CAGmllab commited on Mar 14, 2023

Commit

30c6353

•

1 Parent(s): 682d7c8

Upload 7 files

Browse files

Files changed (7) hide show

app.py +90 -0
requirements.txt +4 -0
streamlit_examples.json +43 -0
styleformer/__init__.py +2 -0
styleformer/adequacy.py +34 -0
styleformer/demo.py +42 -0
styleformer/styleformer.py +163 -0

app.py ADDED Viewed

	@@ -0,0 +1,90 @@

+from styleformer import Styleformer
+import streamlit as st
+import numpy as np
+import json
+class Demo:
+    def __init__(self):
+        st.set_page_config(
+            page_title="Styleformer Demo",
+            initial_sidebar_state="expanded"
+            )
+        self.style_map = {
+            #key : (name , style_num)
+            'ctf': ('Casual to Formal', 0),
+            'ftc': ('Formal to Casual', 1),
+            'atp': ('Active to Passive', 2),
+            'pta': ('Passive to Active', 3)
+            }
+        self.inference_map = {
+            0: 'Regular model on CPU',
+            1: 'Regular model on GPU',
+            2: 'Quantized model on CPU'
+        }
+        with open("streamlit_examples.json") as f:
+            self.examples = json.load(f)
+    @st.cache(show_spinner=False, suppress_st_warning=True, allow_output_mutation=True)
+    def load_sf(self, style=0):
+        sf = Styleformer(style = style)
+        return sf
+    def main(self):
+        github_repo = 'https://github.com/PrithivirajDamodaran/Styleformer'
+        st.title("Styleformer")
+        st.write(f'GitHub Link - [{github_repo}]({github_repo})')
+        st.write('A Neural Language Style Transfer framework to transfer natural language text smoothly between fine-grained language styles like formal/casual, active/passive, and many more')
+        style_key = st.sidebar.selectbox(
+            label='Choose Style',
+            options=list(self.style_map.keys()),
+            format_func=lambda x:self.style_map[x][0]
+            )
+        exp = st.sidebar.beta_expander('Knobs', expanded=True)
+        with exp:
+            inference_on = exp.selectbox(
+                label='Inference on',
+                options=list(self.inference_map.keys()),
+                format_func=lambda x:self.inference_map[x]
+                )
+            quality_filter = exp.slider(
+                label='Quality filter',
+                min_value=0.5,
+                max_value=0.99,
+                value=0.95
+                )
+            max_candidates = exp.number_input(
+                label='Max candidates',
+                min_value=1,
+                max_value=20,
+                value=5
+                )
+        with st.spinner('Loading model..'):
+            sf = self.load_sf(self.style_map[style_key][1])
+        input_text = st.selectbox(
+            label="Choose an example",
+            options=self.examples[style_key]
+            )
+        input_text = st.text_input(
+            label="Input text",
+            value=input_text
+        )
+        if input_text.strip():
+            result = sf.transfer(input_text, inference_on=inference_on, quality_filter=quality_filter, max_candidates=max_candidates)
+            st.markdown(f'#### Output:')
+            st.write('')
+            if result:
+                st.success(result)
+            else:
+                st.info('No good quality transfers available !')
+        else:
+            st.warning("Please select/enter text to proceed")
+if __name__ == "__main__":
+    obj = Demo()
+    obj.main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+transformers
+sentencepiece
+python-Levenshtein
+fuzzywuzzy

streamlit_examples.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "ctf": [
+        "I am quitting my job",
+        "Jimmy is on crack and can't trust him",
+        "What do guys do to show that they like a gal?",
+        "i loooooooooooooooooooooooove going to the movies.",
+        "That movie was fucking awesome",
+        "My mom is doing fine",
+        "That was funny LOL",
+        "It's piece of cake, we can do it",
+        "btw - ur avatar looks familiar",
+        "who gives a crap?",
+        "Howdy Lucy! been ages since we last met.",
+        "Dude, this car's dope!",
+        "She's my bestie from college",
+        "I kinda have a feeling that he has a crush on you.",
+        "OMG! It's finger-lickin' good."
+    ],
+    "ftc": [
+        "That really is quite impressive.",
+        "Would you please allow me to make a suggestion?",
+        "Good morning! How are you?",
+        "I would like to apologise for any inconvenience caused."
+    ],
+    "atp": [
+        "India won ICC Cricket World Cup 2011",
+        "Daya opened the door.",
+        "The cat killed the mouse",
+        "He has not completed the work.",
+        "I have made some cakes.",
+        "They are eating apples.",
+        "The wedding planner is making all the reservations.",
+        "PM declared nation-wide lockdown"
+    ],
+    "pta": [
+        "The lion was killed by the hunter.",
+        "He was given a book for his birthday.",
+        "The house will be cleaned by me every Saturday.",
+        "The Grand Canyon is visited by thousands of tourists every year.",
+        "All the reservations are being made by the wedding planner.",
+        "Money was generously donated to the homeless shelter by him"
+    ]
+  }

styleformer/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from styleformer.styleformer import Styleformer
2	+ from styleformer.adequacy import Adequacy

styleformer/adequacy.py ADDED Viewed

	@@ -0,0 +1,34 @@

+class Adequacy():
+  def __init__(self, model_tag='prithivida/parrot_adequacy_model'):
+    from transformers import AutoModelForSequenceClassification, AutoTokenizer
+    self.adequacy_model = AutoModelForSequenceClassification.from_pretrained(model_tag)
+    self.tokenizer = AutoTokenizer.from_pretrained(model_tag)
+  def filter(self, input_phrase, para_phrases, adequacy_threshold, device="cpu"):
+      top_adequacy_phrases = []
+      for para_phrase in para_phrases:
+        x = self.tokenizer(input_phrase, para_phrase, return_tensors='pt', max_length=128, truncation=True)
+        self.adequacy_model = self.adequacy_model.to(device)
+        logits = self.adequacy_model(**x).logits
+        probs = logits.softmax(dim=1)
+        prob_label_is_true = probs[:,1]
+        adequacy_score = prob_label_is_true.item()
+        if adequacy_score >= adequacy_threshold:
+            top_adequacy_phrases.append(para_phrase)
+      return top_adequacy_phrases
+  def score(self, input_phrase, para_phrases, adequacy_threshold, device="cpu"):
+      adequacy_scores = {}
+      for para_phrase in para_phrases:
+        x = self.tokenizer(input_phrase, para_phrase, return_tensors='pt', max_length=128, truncation=True)
+        x = x.to(device)
+        self.adequacy_model = self.adequacy_model.to(device)
+        logits = self.adequacy_model(**x).logits
+        probs = logits.softmax(dim=1)
+        prob_label_is_true = probs[:,1]
+        adequacy_score = prob_label_is_true.item()
+        if adequacy_score >= adequacy_threshold:
+          adequacy_scores[para_phrase] = adequacy_score
+      return adequacy_scores

styleformer/demo.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from styleformer import Styleformer
+import warnings
+warnings.filterwarnings("ignore")
+import torch
+def set_seed(seed):
+  torch.manual_seed(seed)
+  if torch.cuda.is_available():
+    torch.cuda.manual_seed_all(seed)
+set_seed(1234)
+source_sentences = [
+"I am quitting my job",
+"Jimmy is on crack and can't trust him",
+"What do guys do to show that they like a gal?",
+"i loooooooooooooooooooooooove going to the movies.",
+"That movie was fucking awesome",
+"My mom is doing fine",
+"That was funny LOL",
+"It's piece of cake, we can do it",
+"btw - ur avatar looks familiar",
+"who gives a crap?",
+"Howdy Lucy! been ages since we last met.",
+"Dude, this car's dope!",
+"She's my bestie from college",
+"I kinda have a feeling that he has a crush on you.",
+"OMG! It's finger-lickin' good.",
+]
+# style = [0=Casual to Formal, 1=Formal to Casual, 2=Active to Passive, 3=Passive to Active etc..]
+sf = Styleformer(style = 0)
+for source_sentence in source_sentences:
+    # inference_on = [0=Regular model On CPU, 1= Regular model On GPU, 2=Quantized model On CPU]
+    target_sentence = sf.transfer(source_sentence, inference_on=1, quality_filter=0.95, max_candidates=5)
+    print("[Informal] ", source_sentence)
+    if target_sentence is not None:
+        print("[Formal] ",target_sentence)
+    else:
+        print("No good quality transfers available !")
+    print("-" *100)

styleformer/styleformer.py ADDED Viewed

	@@ -0,0 +1,163 @@

+class Styleformer():
+  def __init__(
+      self,
+      style=0,
+      ctf_model_tag="prithivida/informal_to_formal_styletransfer",
+      ftc_model_tag="prithivida/formal_to_informal_styletransfer",
+      atp_model_tag="prithivida/active_to_passive_styletransfer",
+      pta_model_tag="prithivida/passive_to_active_styletransfer",
+      adequacy_model_tag="prithivida/parrot_adequacy_model",
+  ):
+    from transformers import AutoTokenizer
+    from transformers import AutoModelForSeq2SeqLM
+    from styleformer import Adequacy
+    self.style = style
+    self.adequacy = adequacy_model_tag and Adequacy(model_tag=adequacy_model_tag)
+    self.model_loaded = False
+    if self.style == 0:
+      self.ctf_tokenizer = AutoTokenizer.from_pretrained(ctf_model_tag, use_auth_token=False)
+      self.ctf_model = AutoModelForSeq2SeqLM.from_pretrained(ctf_model_tag, use_auth_token=False)
+      print("Casual to Formal model loaded...")
+      self.model_loaded = True
+    elif self.style == 1:
+      self.ftc_tokenizer = AutoTokenizer.from_pretrained(ftc_model_tag, use_auth_token=False)
+      self.ftc_model = AutoModelForSeq2SeqLM.from_pretrained(ftc_model_tag, use_auth_token=False)
+      print("Formal to Casual model loaded...")
+      self.model_loaded = True
+    elif self.style == 2:
+      self.atp_tokenizer = AutoTokenizer.from_pretrained(atp_model_tag, use_auth_token=False)
+      self.atp_model = AutoModelForSeq2SeqLM.from_pretrained(atp_model_tag, use_auth_token=False)
+      print("Active to Passive model loaded...")
+      self.model_loaded = True
+    elif self.style == 3:
+      self.pta_tokenizer = AutoTokenizer.from_pretrained(pta_model_tag, use_auth_token=False)
+      self.pta_model = AutoModelForSeq2SeqLM.from_pretrained(pta_model_tag, use_auth_token=False)
+      print("Passive to Active model loaded...")
+      self.model_loaded = True
+    else:
+      print("Only CTF, FTC, ATP and PTA are supported in the pre-release...stay tuned")
+  def transfer(self, input_sentence, inference_on=-1, quality_filter=0.95, max_candidates=5):
+      if self.model_loaded:
+        if inference_on == -1:
+          device = "cpu"
+        elif inference_on >= 0 and inference_on < 999:
+          device = "cuda:" + str(inference_on)
+        else:
+          device = "cpu"
+          print("Onnx + Quantisation is not supported in the pre-release...stay tuned.")
+        if self.style == 0:
+          output_sentence = self._casual_to_formal(input_sentence, device, quality_filter, max_candidates)
+          return output_sentence
+        elif self.style == 1:
+          output_sentence = self._formal_to_casual(input_sentence, device, quality_filter, max_candidates)
+          return output_sentence
+        elif self.style == 2:
+          output_sentence = self._active_to_passive(input_sentence, device)
+          return output_sentence
+        elif self.style == 3:
+          output_sentence = self._passive_to_active(input_sentence, device)
+          return output_sentence
+      else:
+        print("Models aren't loaded for this style, please use the right style during init")
+  def _formal_to_casual(self, input_sentence, device, quality_filter, max_candidates):
+      ftc_prefix = "transfer Formal to Casual: "
+      src_sentence = input_sentence
+      input_sentence = ftc_prefix + input_sentence
+      input_ids = self.ftc_tokenizer.encode(input_sentence, return_tensors='pt')
+      self.ftc_model = self.ftc_model.to(device)
+      input_ids = input_ids.to(device)
+      preds = self.ftc_model.generate(
+          input_ids,
+          do_sample=True,
+          max_length=32,
+          top_k=50,
+          top_p=0.95,
+          early_stopping=True,
+          num_return_sequences=max_candidates)
+      gen_sentences = set()
+      for pred in preds:
+        gen_sentences.add(self.ftc_tokenizer.decode(pred, skip_special_tokens=True).strip())
+      adequacy_scored_phrases = self.adequacy.score(src_sentence, list(gen_sentences), quality_filter, device)
+      ranked_sentences = sorted(adequacy_scored_phrases.items(), key = lambda x:x[1], reverse=True)
+      if len(ranked_sentences) > 0:
+        return ranked_sentences[0][0]
+      else:
+        return None
+  def _casual_to_formal(self, input_sentence, device, quality_filter, max_candidates):
+      ctf_prefix = "transfer Casual to Formal: "
+      src_sentence = input_sentence
+      input_sentence = ctf_prefix + input_sentence
+      input_ids = self.ctf_tokenizer.encode(input_sentence, return_tensors='pt')
+      self.ctf_model = self.ctf_model.to(device)
+      input_ids = input_ids.to(device)
+      preds = self.ctf_model.generate(
+          input_ids,
+          do_sample=True,
+          max_length=32,
+          top_k=50,
+          top_p=0.95,
+          early_stopping=True,
+          num_return_sequences=max_candidates)
+      gen_sentences = set()
+      for pred in preds:
+        gen_sentences.add(self.ctf_tokenizer.decode(pred, skip_special_tokens=True).strip())
+      adequacy_scored_phrases = self.adequacy.score(src_sentence, list(gen_sentences), quality_filter, device)
+      ranked_sentences = sorted(adequacy_scored_phrases.items(), key = lambda x:x[1], reverse=True)
+      if len(ranked_sentences) > 0:
+        return ranked_sentences[0][0]
+      else:
+        return None
+  def _active_to_passive(self, input_sentence, device):
+      atp_prefix = "transfer Active to Passive: "
+      src_sentence = input_sentence
+      input_sentence = atp_prefix + input_sentence
+      input_ids = self.atp_tokenizer.encode(input_sentence, return_tensors='pt')
+      self.atp_model = self.atp_model.to(device)
+      input_ids = input_ids.to(device)
+      preds = self.atp_model.generate(
+          input_ids,
+          do_sample=True,
+          max_length=32,
+          top_k=50,
+          top_p=0.95,
+          early_stopping=True,
+          num_return_sequences=1)
+      return self.atp_tokenizer.decode(preds[0], skip_special_tokens=True).strip()
+  def _passive_to_active(self, input_sentence, device):
+      pta_prefix = "transfer Passive to Active: "
+      src_sentence = input_sentence
+      input_sentence = pta_prefix + input_sentence
+      input_ids = self.pta_tokenizer.encode(input_sentence, return_tensors='pt')
+      self.pta_model = self.pta_model.to(device)
+      input_ids = input_ids.to(device)
+      preds = self.pta_model.generate(
+          input_ids,
+          do_sample=True,
+          max_length=32,
+          top_k=50,
+          top_p=0.95,
+          early_stopping=True,
+          num_return_sequences=1)
+      return self.pta_tokenizer.decode(preds[0], skip_special_tokens=True).strip()