Spaces:
Runtime error
Runtime error
File size: 2,113 Bytes
6bc94ac 436ce71 6bc94ac 436ce71 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import re
import spacy
import json
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, AutoModel
import streamlit as st
he_regex = re.compile(r'\b(he|him|himself)\b', flags=re.IGNORECASE)
his_regex = re.compile(r'\b(his)\b', flags=re.IGNORECASE)
she_regex = re.compile(r'\b(she|herself)\b', flags=re.IGNORECASE)
her_regex = re.compile(r'\b(her)\b', flags=re.IGNORECASE)
def hide_footer():
hide_st_style = """
<style>
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_st_style, unsafe_allow_html=True)
@st.cache_resource
def get_seq2seq_model(model_id):
return AutoModelForSeq2SeqLM.from_pretrained(model_id)
@st.cache_resource
def get_causal_model(model_id):
return AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)
@st.cache_resource
def get_auto_model(model_id):
return AutoModel.from_pretrained(model_id)
@st.cache_resource
def get_tokenizer(model_id):
return AutoTokenizer.from_pretrained(model_id)
@st.cache_data
def get_celeb_data(fpath):
with open(fpath) as json_file:
return json.load(json_file)
@st.cache_resource
def preprocess_text(name, gender, text, model_id):
lname = name.split(" ")[-1]
lname_regex = re.compile(rf'\b({lname})\b')
name_regex = re.compile(rf'\b({name})\b')
lnames = lname+"’s" if not lname.endswith("s") else lname+"’"
lnames_regex = re.compile(rf'\b({lnames})\b')
names = name+"’s" if not name.endswith("s") else name+"’"
names_regex = re.compile(rf'\b({names})\b')
if gender == "M":
text = re.sub(he_regex, "I", text)
text = re.sub(his_regex, "my", text)
elif gender == "F":
text = re.sub(she_regex, "I", text)
text = re.sub(her_regex, "my", text)
text = re.sub(names_regex, "my", text)
text = re.sub(lnames_regex, "my", text)
text = re.sub(name_regex, "I", text)
text = re.sub(lname_regex, "I", text)
spacy_model = spacy.load(model_id)
texts = [i.text.strip() for i in spacy_model(text).sents]
return spacy_model, texts |