taishi-i's picture
Fix typo
4134ae7
raw
history blame
2.31 kB
import streamlit as st
from transformers import pipeline
from nagisa_bert import NagisaBertTokenizer
@st.cache(allow_output_mutation=True)
def load_tasks():
model_name = "taishi-i/nagisa_bert"
tokenizer = NagisaBertTokenizer.from_pretrained(model_name)
fill_mask = pipeline(
"fill-mask",
model=model_name,
tokenizer=tokenizer
)
feature_extract = pipeline(
"feature-extraction",
model=model_name,
tokenizer=tokenizer
)
tasks = {
"Tokenize": tokenizer,
"Fill-mask": fill_mask,
"Feature-extraction": feature_extract
}
return tasks
task2samples = {
"Fill-mask": {
"label": "[MASK]ใ‚’ๅซใ‚€ใƒ†ใ‚ญใ‚นใƒˆใ‚’ๅ…ฅๅŠ›ใ—ใฆใใ ใ•ใ„ใ€‚",
"value": "nagisaใง[MASK]ใงใใ‚‹ใƒขใƒ‡ใƒซใงใ™"
},
"Feature-extraction": {
"label": "[CLS]ใƒˆใƒผใ‚ฏใƒณใฎใƒ™ใ‚ฏใƒˆใƒซใ‚’ๅ–ๅพ—ใ—ใพใ™ใ€‚ใƒ™ใ‚ฏใƒˆใƒซๅŒ–ใ™ใ‚‹ใƒ†ใ‚ญใ‚นใƒˆใ‚’ๅ…ฅๅŠ›ใ—ใฆใใ ใ•ใ„ใ€‚",
"value": "nagisaใงๅˆฉ็”จใงใใ‚‹ใƒขใƒ‡ใƒซใงใ™"
},
"Tokenize": {
"label": "ใƒˆใƒผใ‚ฏใƒŠใ‚คใ‚บใ™ใ‚‹ใƒ†ใ‚ญใ‚นใƒˆใ‚’ๅ…ฅๅŠ›ใ—ใฆใใ ใ•ใ„ใ€‚",
"value": "nagisaใงๅˆฉ็”จใงใใ‚‹ใƒขใƒ‡ใƒซใงใ™"
},
}
def main():
tasks = load_tasks()
task_selection = st.selectbox(
"Select a task (Fill-mask, Feature-extraction, Tokenize)",
("Fill-mask", "Feature-extraction", "Tokenize"))
with st.form("Fill-mask"):
text = st.text_area(
label=task2samples[task_selection]["label"],
value=task2samples[task_selection]["value"],
max_chars=512
)
submitted = st.form_submit_button("Submit")
if submitted:
if task_selection == "Fill-mask":
if "[MASK]" in text:
out = tasks[task_selection](text)
st.json(out)
else:
st.write("[MASK] ใ‚’ๅซใ‚€ใƒ†ใ‚ญใ‚นใƒˆใ‚’ๅ…ฅๅŠ›ใ—ใฆใใ ใ•ใ„ใ€‚")
elif task_selection == "Feature-extraction":
out = tasks[task_selection](text)[0][0]
st.code(out)
elif task_selection == "Tokenize":
out = tasks[task_selection].tokenize(text)
st.json(out)
if __name__ == "__main__":
main()