Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import pipeline | |
from nagisa_bert import NagisaBertTokenizer | |
def load_tasks(): | |
model_name = "taishi-i/nagisa_bert" | |
tokenizer = NagisaBertTokenizer.from_pretrained(model_name) | |
fill_mask = pipeline( | |
"fill-mask", | |
model=model_name, | |
tokenizer=tokenizer | |
) | |
feature_extract = pipeline( | |
"feature-extraction", | |
model=model_name, | |
tokenizer=tokenizer | |
) | |
tasks = { | |
"Tokenize": tokenizer, | |
"Fill-mask": fill_mask, | |
"Feature-extraction": feature_extract | |
} | |
return tasks | |
task2samples = { | |
"Fill-mask": { | |
"label": "[MASK]ใๅซใใใญในใใๅ ฅๅใใฆใใ ใใใ", | |
"value": "nagisaใง[MASK]ใงใใใขใใซใงใ" | |
}, | |
"Feature-extraction": { | |
"label": "[CLS]ใใผใฏใณใฎใใฏใใซใๅๅพใใพใใใใฏใใซๅใใใใญในใใๅ ฅๅใใฆใใ ใใใ", | |
"value": "nagisaใงๅฉ็จใงใใใขใใซใงใ" | |
}, | |
"Tokenize": { | |
"label": "ใใผใฏใใคใบใใใใญในใใๅ ฅๅใใฆใใ ใใใ", | |
"value": "nagisaใงๅฉ็จใงใใใขใใซใงใ" | |
}, | |
} | |
def main(): | |
tasks = load_tasks() | |
task_selection = st.selectbox( | |
"Select a task (Fill-mask, Feature-extraction, Tokenize)", | |
("Fill-mask", "Feature-extraction", "Tokenize")) | |
with st.form("Fill-mask"): | |
text = st.text_area( | |
label=task2samples[task_selection]["label"], | |
value=task2samples[task_selection]["value"], | |
max_chars=512 | |
) | |
submitted = st.form_submit_button("Submit") | |
if submitted: | |
if task_selection == "Fill-mask": | |
if "[MASK]" in text: | |
out = tasks[task_selection](text) | |
st.json(out) | |
else: | |
st.write("[MASK] ใๅซใใใญในใใๅ ฅๅใใฆใใ ใใใ") | |
elif task_selection == "Feature-extraction": | |
out = tasks[task_selection](text)[0][0] | |
st.code(out) | |
elif task_selection == "Tokenize": | |
out = tasks[task_selection].tokenize(text) | |
st.json(out) | |
if __name__ == "__main__": | |
main() |