Hong
Update app.py
b81c70b
from platform import processor
import streamlit as st
from load_data import candidate_labels
import numpy as np
from load_data import *
import pickle
import torch
from BART_utils import get_taggs
from stqdm import stqdm
import pandas as pd
def transform_data(data, filetype = True):
if filetype:
df = pd.read_csv(uploaded_file)
else:
df = pd.read_excel(uploaded_file)
return df
def convert_df(df):
return df.to_csv().encode('utf-8')
stqdm.pandas()
st.title("Domain and Usage tagger")
st.subheader("๋ฌธ์žฅ์„ ์ž…๋ ฅํ•˜๋ฉด ์ฃผ์ œ / ์šฉ๋„ ํƒœ๊ทธ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค (EN์ง€์›)")
device = "cuda:0" if torch.cuda.is_available() else "cpu"
if device == "cpu":
processor = "๐Ÿ–ฅ๏ธ"
else:
processor = "๐Ÿ’ฝ"
st.subheader("Running on {}".format(device + processor))
bulk = st.checkbox("ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•˜์‹œ๊ฒ ์–ด์š”?")
if not bulk:
user_input = st.text_area(
"๐Ÿ‘‡ํƒœ๊ทธ๋ฅผ ์ƒ์„ฑํ•  ๋ฌธ์žฅ์„ ์ž…๋ ฅํ•˜์„ธ์š” - ํ˜„์žฌ ์˜๋ฌธ๋งŒ ์ง€์›๋ฉ๋‹ˆ๋‹ค.", """NLI-based Zero Shot Text Classification
Yin et al. proposed a method for using pre-trained NLI models as a ready-made zero-shot sequence classifiers. The method works by posing the sequence to be classified as the NLI premise and to construct a hypothesis from each candidate label. The probabilities for entailment and contradiction are then converted to label probabilities."""
)
thred = st.slider(
"๐Ÿ‘‡ํƒœ๊ทธ ์ƒ์„ฑ thredhold ์„ค์ •. ๊ฒฐ๊ณผ๊ฐ€ ๋‚˜์˜ค์ง€ ์•Š์„๊ฒฝ์šฐ, threshold๋ฅผ 0์— ๊ฐ€๊น๊ฒŒ ๋‚ฎ์ถ”์„ธ์š”!",
0.0,
1.0,
0.5,
step=0.01,
)
if thred:
st.write(thred, " ์ด์ƒ์˜ confidence level์ธ ํƒœ๊ทธ๋งŒ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.")
maximum = st.number_input("๐Ÿ‘‡์ตœ๋Œ€ ํƒœ๊ทธ ๊ฐฏ์ˆ˜ ์„ค์ •", 0, 10, 5, step=1)
st.write("์ตœ๋Œ€ {} ๊ฐœ์˜ ํƒœ๊ทธ ์ƒ์„ฑ".format(maximum))
check_source = st.checkbox("๐Ÿท๏ธ์šฉ์ฒ˜ / ์ถœ์ฒ˜ ํƒœ๊ทธ ์ƒ์„ฑ")
submit = st.button("๐Ÿ‘ˆํด๋ฆญํ•ด์„œ ํƒœ๊ทธ ์ƒ์„ฑ")
if submit:
with st.spinner("โŒ›ํƒœ๊ทธ๋ฅผ ์ƒ์„ฑํ•˜๋Š” ์ค‘์ž…๋‹ˆ๋‹ค..."):
result = get_taggs(user_input, candidate_labels, thred)
result = result[:maximum]
st.subheader("๐Ÿ”ํ˜น์‹œ ์ด๋Ÿฐ ์ฃผ์ œ์˜ ๋ฌธ์žฅ์ธ๊ฐ€์š”? : ")
if len(result) == 0:
st.write("๐Ÿ˜ข์ €๋Ÿฐ..๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค. Threshold๋ฅผ ๋‚ฎ์ถฐ๋ณด์„ธ์š”!")
for i in result:
st.write("โžก๏ธ " + i[0], "{}%".format(int(i[1] * 100)))
if check_source:
with st.spinner("โŒ›์‚ฌ์šฉ ๋ชฉ์  ํƒœ๊ทธ ์ƒ์„ฑ์ค‘..."):
source_result = get_taggs(user_input, source, thred=0)
st.subheader("๐Ÿ”ํ˜น์‹œ ์ด ์‚ฌ์šฉ๋ชฉ์ ์˜ ๋ฌธ์žฅ์ธ๊ฐ€์š”? : ")
for i in source_result[:3]:
st.write("๐Ÿท๏ธ " + i[0], "{}%".format(int(i[1] * 100)))
else:
st.write("๐Ÿ”์ปฌ๋Ÿผ๋ช…์„ 'text'๋กœ ์„ค์ •ํ•ด, ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•ด์ฃผ์„ธ์š”!")
filetype = st.checkbox("๐Ÿ‘ˆUsing CSV? (์ฒดํฌํ•˜์ง€ ์•Š์œผ๋ฉด xlsx ์‚ฌ์šฉ): ")
uploaded_file = st.file_uploader("Choose an csv file")
if uploaded_file is not None:
df = transform_data(uploaded_file, filetype)
st.write(df)
thred = st.slider(
"๐Ÿ‘‡ํƒœ๊ทธ ์ƒ์„ฑ thredhold ์„ค์ •. ๊ฒฐ๊ณผ๊ฐ€ ๋‚˜์˜ค์ง€ ์•Š์„๊ฒฝ์šฐ, threshold๋ฅผ 0์— ๊ฐ€๊น๊ฒŒ ๋‚ฎ์ถ”์„ธ์š”!",
0.0,
1.0,
0.5,
step=0.01,
)
if thred:
st.write(thred, " ์ด์ƒ์˜ confidence level์ธ ํƒœ๊ทธ๋งŒ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.")
maximum = st.number_input("๐Ÿ‘‡์ตœ๋Œ€ ํƒœ๊ทธ ๊ฐฏ์ˆ˜ ์„ค์ •", 0, 10, 5, step=1)
st.write("์ตœ๋Œ€ {} ๊ฐœ์˜ ํƒœ๊ทธ ์ƒ์„ฑ".format(maximum))
check_source = st.checkbox("๐Ÿท๏ธ์šฉ์ฒ˜ / ์ถœ์ฒ˜ ํƒœ๊ทธ ์ƒ์„ฑ")
submit = st.button("๐Ÿ‘ˆํด๋ฆญํ•ด์„œ ํƒœ๊ทธ ์ƒ์„ฑ")
if submit:
with st.spinner("โŒ›ํƒœ๊ทธ๋ฅผ ์ƒ์„ฑํ•˜๋Š” ์ค‘์ž…๋‹ˆ๋‹ค..."):
df["generated_tag"] = df["text"].progress_apply(lambda x : get_taggs(x, candidate_labels, thred)[:maximum])
if check_source:
with st.spinner("โŒ›์‚ฌ์šฉ ๋ชฉ์  ํƒœ๊ทธ ์ƒ์„ฑ์ค‘..."):
df["source"] = df["text"].progress_apply(lambda x : get_taggs(x, source, thred=0))
csv = convert_df(df)
to_json = {}
for idx, row in df.iterrows():
to_json[row.text] = {}
to_json[row.text]["generated_tag"] = row.generated_tag
to_json[row.text]["source"] = row.source
st.download_button(
"Press to Download",
csv,
"file.csv",
"text/csv",
key='download-csv'
)
st.write("๐Ÿ””Outcome: ")
st.write(to_json)