Spaces:

liuhaozhe6788
/

CelebChat

Running

App Files Files Community

CelebChat / utils.py

lhzstar

new commits

3e6feb3 9 months ago

raw

history blame

No virus

2.11 kB

	import re
	import spacy
	import json
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, AutoModel
	import streamlit as st

	he_regex = re.compile(r'\b(he\|him\|himself)\b', flags=re.IGNORECASE)
	his_regex = re.compile(r'\b(his)\b', flags=re.IGNORECASE)
	she_regex = re.compile(r'\b(she\|herself)\b', flags=re.IGNORECASE)
	her_regex = re.compile(r'\b(her)\b', flags=re.IGNORECASE)


	def hide_footer():
	hide_st_style = """
	<style>
	footer {visibility: hidden;}
	</style>
	"""
	st.markdown(hide_st_style, unsafe_allow_html=True)

	@st.cache_resource
	def get_seq2seq_model(model_id):
	return AutoModelForSeq2SeqLM.from_pretrained(model_id)

	@st.cache_resource
	def get_causal_model(model_id):
	return AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)

	@st.cache_resource
	def get_auto_model(model_id):
	return AutoModel.from_pretrained(model_id)

	@st.cache_resource
	def get_tokenizer(model_id):
	return AutoTokenizer.from_pretrained(model_id)

	@st.cache_data
	def get_celeb_data(fpath):
	with open(fpath) as json_file:
	return json.load(json_file)

	@st.cache_resource
	def preprocess_text(name, gender, text, model_id):
	lname = name.split(" ")[-1]
	lname_regex = re.compile(rf'\b({lname})\b')
	name_regex = re.compile(rf'\b({name})\b')
	lnames = lname+"’s" if not lname.endswith("s") else lname+"’"
	lnames_regex = re.compile(rf'\b({lnames})\b')
	names = name+"’s" if not name.endswith("s") else name+"’"
	names_regex = re.compile(rf'\b({names})\b')
	if gender == "M":
	text = re.sub(he_regex, "I", text)
	text = re.sub(his_regex, "my", text)
	elif gender == "F":
	text = re.sub(she_regex, "I", text)
	text = re.sub(her_regex, "my", text)
	text = re.sub(names_regex, "my", text)
	text = re.sub(lnames_regex, "my", text)
	text = re.sub(name_regex, "I", text)
	text = re.sub(lname_regex, "I", text)
	spacy_model = spacy.load(model_id)
	texts = [i.text.strip() for i in spacy_model(text).sents]
	return spacy_model, texts