Spaces:

kruntuid
/

ysda_nlp_task12

Runtime error

App Files Files Community

kruntuid commited on Dec 13, 2021

Commit

d22ab1f

•

1 Parent(s): 4d415e1

ysda next commit

Browse files

Files changed (3) hide show

GD_download.py +31 -0
app.py +91 -2
requirements.txt +3 -0

GD_download.py ADDED Viewed

	@@ -0,0 +1,31 @@

+#taken from this StackOverflow answer: https://stackoverflow.com/a/39225039
+import requests
+def download_file_from_google_drive(id, destination):
+    URL = "https://docs.google.com/uc?export=download"
+    session = requests.Session()
+    response = session.get(URL, params = { 'id' : id }, stream = True)
+    token = get_confirm_token(response)
+    if token:
+        params = { 'id' : id, 'confirm' : token }
+        response = session.get(URL, params = params, stream = True)
+    save_response_content(response, destination)
+def get_confirm_token(response):
+    for key, value in response.cookies.items():
+        if key.startswith('download_warning'):
+            return value
+    return None
+def save_response_content(response, destination):
+    CHUNK_SIZE = 32768
+    with open(destination, "wb") as f:
+        for chunk in response.iter_content(CHUNK_SIZE):
+            if chunk: # filter out keep-alive new chunks
+                f.write(chunk)

app.py CHANGED Viewed

@@ -1,4 +1,93 @@
 import streamlit as st
-x = st.slider('Select a value')
-st.write(x, 'squared is', x * x)

 import streamlit as st
+from pathlib import Path
+import torch
+from transformers import BertTokenizer
+@st.cache
+def get_tokenizer():
+    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+    return tokenizer
+@st.cache
+def load_model_bert_mlm_positive():
+    f_checkpoint = Path("bert_mlm_positive.model")
+    if not f_checkpoint.exists():
+        with st.spinner("Downloading bert_mlm_positive... this may take awhile! \n Don't stop it!"):
+            from GD_download import download_file_from_google_drive
+            cloud_model_location = "12Gvgv6zaOLJ8oyYXVB5_GYNEfvsjudG_"
+            download_file_from_google_drive(cloud_model_location, f_checkpoint)
+    model = torch.load(f_checkpoint, map_location=torch.device('cpu'))
+    model.eval()
+    return model
+@st.cache
+def load_model_model_seq_classify():
+    f_checkpoint = Path("model_seq_classify.model")
+    if not f_checkpoint.exists():
+        with st.spinner("Downloading model_seq_classify... this may take awhile! \n Don't stop it!"):
+            from GD_download import download_file_from_google_drive
+            cloud_model_location = "13DwlCIM6aYc4WeOCIRqdGy-U0LGc8f0B"
+            download_file_from_google_drive(cloud_model_location, f_checkpoint)
+    model = torch.load(f_checkpoint, map_location=torch.device('cpu'))
+    model.eval()
+    return model
+def get_replacements_beamsearch(tokenizer, bert_mlm_positive, seq_classify_model, sentence: str, num_candidates=3):
+    sentence_ix = tokenizer(sentence, return_tensors='pt')
+    tokens = [tokenizer.decode([t]) for t in sentence_ix['input_ids'].cpu().numpy()[0]]
+    length = len(sentence_ix['input_ids'][0])
+    current = [(tokens, 0)]
+    for ix in range(1,length-1):
+        new_current = []
+        for item in current:
+            sent = " ".join(item[0][1:-1])
+            prob_seq = item[1]
+            new_current.append(item)
+            sent_ix = tokenizer(sent, return_tensors='pt')
+            logits_positive = bert_mlm_positive(**sent_ix).logits
+            probs_positive = logits_positive.softmax(dim=-1)[0, ix]
+            indices = torch.argsort(probs_positive, descending=True)
+            for cand_ix in range(num_candidates):
+                token_id = indices[cand_ix]
+                new_seq = item[0].copy()
+                new_seq[ix] = tokenizer.decode([token_id])
+                logits = seq_classify_model(**tokenizer(" ".join(new_seq[1:-1]), return_tensors='pt')).logits
+                prob = logits.softmax(dim=-1)[0][1]
+                new_current.append((new_seq, prob))
+        current = sorted(new_current, key=lambda x: -x[1])[:num_candidates]
+    return [" ".join(item[0][1:-1]) for item in current]
+negative_phrase = st.text_input("Input negative phrase")
+num_candidates = st.slider("Number of candidates", min_value=1, max_value=5)
+if negative_phrase:
+    bert_mlm_positive = load_model_bert_mlm_positive()
+    model_seq_classify = load_model_model_seq_classify()
+    ret = get_replacements_beamsearch(get_tokenizer(), bert_mlm_positive,
+            model_seq_classify, negative_phrase, num_candidates=num_candidates)
+    st.caption("Output positive phrases:")
+    for i in range(len(ret)):
+        st.caption(ret[i])

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+streamlit==1.2.0
+torch==1.10.0
+transformers==4.11.3