File size: 2,113 Bytes
6bc94ac
436ce71
 
 
6bc94ac
 
 
 
 
 
 
 
 
 
 
 
 
 
436ce71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import re
import spacy
import json
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, AutoModel
import streamlit as st

he_regex = re.compile(r'\b(he|him|himself)\b', flags=re.IGNORECASE)
his_regex = re.compile(r'\b(his)\b', flags=re.IGNORECASE)
she_regex = re.compile(r'\b(she|herself)\b', flags=re.IGNORECASE)
her_regex = re.compile(r'\b(her)\b', flags=re.IGNORECASE)


def hide_footer():
    hide_st_style = """
            <style>
            footer {visibility: hidden;}
            </style>
            """
    st.markdown(hide_st_style, unsafe_allow_html=True)

@st.cache_resource
def get_seq2seq_model(model_id):
    return AutoModelForSeq2SeqLM.from_pretrained(model_id)

@st.cache_resource
def get_causal_model(model_id):
    return AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)

@st.cache_resource
def get_auto_model(model_id):
    return AutoModel.from_pretrained(model_id)

@st.cache_resource
def get_tokenizer(model_id):
    return AutoTokenizer.from_pretrained(model_id)

@st.cache_data
def get_celeb_data(fpath):
    with open(fpath) as json_file:
        return json.load(json_file)

@st.cache_resource
def preprocess_text(name, gender, text, model_id):
    lname = name.split(" ")[-1]
    lname_regex = re.compile(rf'\b({lname})\b')
    name_regex = re.compile(rf'\b({name})\b')
    lnames = lname+"’s" if not lname.endswith("s") else lname+"’"
    lnames_regex = re.compile(rf'\b({lnames})\b')
    names = name+"’s" if not name.endswith("s") else name+"’"
    names_regex = re.compile(rf'\b({names})\b')
    if gender == "M":
        text = re.sub(he_regex, "I", text)
        text = re.sub(his_regex, "my", text)
    elif gender == "F":
        text = re.sub(she_regex, "I", text)
        text = re.sub(her_regex, "my", text)
    text = re.sub(names_regex, "my", text)
    text = re.sub(lnames_regex, "my", text)
    text = re.sub(name_regex, "I", text)
    text = re.sub(lname_regex, "I", text)
    spacy_model = spacy.load(model_id)
    texts = [i.text.strip() for i in spacy_model(text).sents]
    return spacy_model, texts