Spaces:

AleksBlacky
/

Science_topic_classifier

Runtime error

File size: 3,081 Bytes

dabf7ab
 
6afc0d2
3722795
dabf7ab
 
 
 
8cf1f84
dabf7ab
 
 
 
 
 
 
 
3722795
dabf7ab
 
 
 
3722795
dabf7ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3722795
dabf7ab
 
 
 
 
 
 
 
 
 
 
 
de73359
 
 
 
dabf7ab
8cf1f84
dabf7ab
 
 
 
 
3722795
 
dabf7ab
 
 
 
6afc0d2
dabf7ab
 
 
 
 
 
3722795
8cf1f84
 
dabf7ab
 
 
 
3722795
dabf7ab
 
 
 
 
6afc0d2
3722795
 
8cf1f84

import streamlit as st
from pandas import DataFrame
import seaborn as sns
from model import ArxivClassifierModelsPipeline

st.markdown("# Hello, friend!")
st.markdown(" This magic application going to help you with understanding of science paper topic! Cool? Yeah! ")

model = ArxivClassifierModelsPipeline()

with st.form(key="my_form"):
    st.markdown("### 🎈 Do you want a little magic?  ")
    st.markdown(" Write your article title and abstract to textboxes bellow and I'll gues topic of your paper!  ")
    ce, c2, c3 = st.columns([0.07, 5, 0.07])

    with c2:
        doc_title = st.text_area(
            "Paste your paper's title below (max 100 words)",
            height=210,
        )

        doc_abstract = st.text_area(
            "Paste your paper's abstract text below (max 100500 words)",
            height=410,
        )

        MAX_WORDS_TITLE, MAX_WORDS_ABSTRACT = 50, 500
        import re

        len_title = len(re.findall(r"\w+", doc_title))
        len_abstract = len(re.findall(r"\w+", doc_abstract))
        if len_title > MAX_WORDS_TITLE:
            st.warning(
                "⚠️ Your title contains "
                + str(len_title)
                + " words."
                + " Only the first 50 words will be reviewed. Stay tuned as increased allowance is coming! 😊"
            )

            doc_title = doc_title[:MAX_WORDS_TITLE]

        if len_abstract > MAX_WORDS_ABSTRACT:
            st.warning(
                "⚠️ Your abstract contains "
                + str(len_abstract)
                + " words."
                + " Only the first 500 words will be reviewed. Stay tuned as increased allowance is coming! 😊"
            )

            doc_abstract = doc_abstract[:MAX_WORDS_ABSTRACT]

        submit_button = st.form_submit_button(label="✨ Let's play, try it!")

if not submit_button:
    st.stop()


title = doc_title
abstract = doc_abstract
# try:
#     tokens = tokenizer_(title + abstract, return_tensors="pt")
# except ValueError:
#     st.error("Word parsing into tokens went wrong! Is input valid? If yes, pls contact author alekseystepin13@gmail.com")

preds_topic, preds_maintopic = model.make_predict(title + abstract)

st.markdown("## 🎈 Yor article probably about:  ")
st.header("")

df = (
    DataFrame(preds_topic.items(), columns=["Topic", "Probability"])
        .sort_values(by="Probability", ascending=False)
        .reset_index(drop=True)
)
df.index += 1


# Add styling
cmGreen = sns.light_palette("green", as_cmap=True)
cmRed = sns.light_palette("red", as_cmap=True)
df = df.style.background_gradient(
    cmap=cmGreen,
    subset=[
        "Probability",
    ],
)

c1, c2, c3 = st.columns([1, 3, 1])

format_dictionary = {
    "Probability": "{:.1%}",
}

df = df.format(format_dictionary)

with c2:
    st.markdown("#### We suppose your research about:  ")
    st.markdown(f"### {preds_maintopic}! ")
    st.markdown(f"Wow, we're impressed, are you addicted to {preds_maintopic.lower()}?! Coool! ")
    st.markdown("##### More detailed, it's about topic:  ")
    st.table(df)