File size: 3,081 Bytes
dabf7ab
 
6afc0d2
3722795
dabf7ab
 
 
 
8cf1f84
dabf7ab
 
 
 
 
 
 
 
3722795
dabf7ab
 
 
 
3722795
dabf7ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3722795
dabf7ab
 
 
 
 
 
 
 
 
 
 
 
de73359
 
 
 
dabf7ab
8cf1f84
dabf7ab
 
 
 
 
3722795
 
dabf7ab
 
 
 
6afc0d2
dabf7ab
 
 
 
 
 
3722795
8cf1f84
 
dabf7ab
 
 
 
3722795
dabf7ab
 
 
 
 
6afc0d2
3722795
 
8cf1f84
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import streamlit as st
from pandas import DataFrame
import seaborn as sns
from model import ArxivClassifierModelsPipeline

st.markdown("# Hello, friend!")
st.markdown(" This magic application going to help you with understanding of science paper topic! Cool? Yeah! ")

model = ArxivClassifierModelsPipeline()

with st.form(key="my_form"):
    st.markdown("### 🎈 Do you want a little magic?  ")
    st.markdown(" Write your article title and abstract to textboxes bellow and I'll gues topic of your paper!  ")
    ce, c2, c3 = st.columns([0.07, 5, 0.07])

    with c2:
        doc_title = st.text_area(
            "Paste your paper's title below (max 100 words)",
            height=210,
        )

        doc_abstract = st.text_area(
            "Paste your paper's abstract text below (max 100500 words)",
            height=410,
        )

        MAX_WORDS_TITLE, MAX_WORDS_ABSTRACT = 50, 500
        import re

        len_title = len(re.findall(r"\w+", doc_title))
        len_abstract = len(re.findall(r"\w+", doc_abstract))
        if len_title > MAX_WORDS_TITLE:
            st.warning(
                "⚠️ Your title contains "
                + str(len_title)
                + " words."
                + " Only the first 50 words will be reviewed. Stay tuned as increased allowance is coming! 😊"
            )

            doc_title = doc_title[:MAX_WORDS_TITLE]

        if len_abstract > MAX_WORDS_ABSTRACT:
            st.warning(
                "⚠️ Your abstract contains "
                + str(len_abstract)
                + " words."
                + " Only the first 500 words will be reviewed. Stay tuned as increased allowance is coming! 😊"
            )

            doc_abstract = doc_abstract[:MAX_WORDS_ABSTRACT]

        submit_button = st.form_submit_button(label="✨ Let's play, try it!")

if not submit_button:
    st.stop()


title = doc_title
abstract = doc_abstract
# try:
#     tokens = tokenizer_(title + abstract, return_tensors="pt")
# except ValueError:
#     st.error("Word parsing into tokens went wrong! Is input valid? If yes, pls contact author alekseystepin13@gmail.com")

preds_topic, preds_maintopic = model.make_predict(title + abstract)

st.markdown("## 🎈 Yor article probably about:  ")
st.header("")

df = (
    DataFrame(preds_topic.items(), columns=["Topic", "Probability"])
        .sort_values(by="Probability", ascending=False)
        .reset_index(drop=True)
)
df.index += 1


# Add styling
cmGreen = sns.light_palette("green", as_cmap=True)
cmRed = sns.light_palette("red", as_cmap=True)
df = df.style.background_gradient(
    cmap=cmGreen,
    subset=[
        "Probability",
    ],
)

c1, c2, c3 = st.columns([1, 3, 1])

format_dictionary = {
    "Probability": "{:.1%}",
}

df = df.format(format_dictionary)

with c2:
    st.markdown("#### We suppose your research about:  ")
    st.markdown(f"### {preds_maintopic}! ")
    st.markdown(f"Wow, we're impressed, are you addicted to {preds_maintopic.lower()}?! Coool! ")
    st.markdown("##### More detailed, it's about topic:  ")
    st.table(df)