File size: 5,530 Bytes
4a26e59
3454d4e
23189bc
3454d4e
 
 
 
 
 
 
 
 
 
 
 
4a26e59
23189bc
4a26e59
 
 
 
 
 
 
 
 
 
 
 
3454d4e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a26e59
 
3454d4e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ae23c23
 
 
 
 
 
 
 
 
3454d4e
 
25a1096
 
 
 
 
 
 
ae23c23
3454d4e
 
25a1096
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import torch
import streamlit as st
from transformers import pipeline, AutoTokenizer, AutoModelForQuestionAnswering

st.set_page_config(page_title="Common NLP Tasks")
st.title("Common NLP Tasks")
st.subheader("Use the menu on the left to select a NLP task to do (click on > if closed).")

expander = st.sidebar.expander('About')
expander.write("This web app allows you to perform common Natural Language Processing tasks, select a task below to get started.")

st.sidebar.header('What will you like to do?')
option = st.sidebar.radio('', ['Extractive question answering', 'Text summarization', 'Text generation', 'Sentiment analysis'])

@st.cache(show_spinner=False, allow_output_mutation=True)
def question_answerer(context, question):
    tokenizer = AutoTokenizer.from_pretrained("deepset/roberta-base-squad2")
    model = AutoModelForQuestionAnswering.from_pretrained("deepset/roberta-base-squad2")
    inputs = tokenizer(question, context, add_special_tokens=True, return_tensors="pt")
    input_ids = inputs["input_ids"].tolist()[0]
    outputs = model(**inputs)
    answer_start_scores = outputs.start_logits
    answer_end_scores = outputs.end_logits
    # Get the most likely beginning of answer with the argmax of the score
    answer_start = torch.argmax(answer_start_scores)
    # Get the most likely end of answer with the argmax of the score 
    answer_end = torch.argmax(answer_end_scores) + 1
    answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))
    return answer

@st.cache(show_spinner=False, allow_output_mutation=True)
def summarization_model():
    summarizer = pipeline("summarization")
    return summarizer

@st.cache(show_spinner=False, allow_output_mutation=True)
def generation_model():
    generator = pipeline("text-generation")
    return generator

@st.cache(show_spinner=False, allow_output_mutation=True)
def sentiment_model():
    sentiment_analysis = pipeline("sentiment-analysis")
    return sentiment_analysis

if option == 'Extractive question answering':
    st.markdown("<h2 style='text-align: center; color:red;'>Extract answer from text</h2>", unsafe_allow_html=True)
    sample_text = "sample text"
    source = st.radio("How would you like to start? Choose an option below", ["I want to input some text", "I want to upload a file"])
    if source == "I want to input some text":
        context = st.text_area('Use the example below or input your own text in English (between 1,000 and 10,000 characters)', value=sample_text, max_chars=10000, height=330)
        question = st.text_input(label='Enter your question')
        button = st.button('Get answer')
        if button:
            with st.spinner(text="Getting answer..."):
                answer = question_answerer(context, question)
                st.write(answer)
    elif source == "I want to upload a file":
        uploaded_file = st.file_uploader("Choose a .txt file to upload", type=["txt"])
        question = st.text_input(label='Enter your question')
        button = st.button('Get answer')
        if button:
            question_answerer = question_model()
            with st.spinner(text="Getting answer..."):
                answer = question_answerer(context=context, question=question)
                st.write(answer["answer"])

elif option == 'Text summarization':
    st.markdown("<h2 style='text-align: center; color:red;'>Summarize text</h2>", unsafe_allow_html=True)
    sample_text = "sample text"
    source = st.radio("How would you like to start? Choose an option below", ["I want to input some text", "I want to upload a file"])
    if source == "I want to input some text":
        text = st.text_area('Input a text in English (between 1,000 and 10,000 characters)', value=sample_text, max_chars=10000, height=330)
        button = st.button('Get summary')
        if button:
            summarizer = summarization_model()
            with st.spinner(text="Summarizing text..."):
                summary = summarizer(text, max_length=130, min_length=30)
                st.write(summary)

    elif source == "I want to upload a file":
        uploaded_file = st.file_uploader("Choose a .txt file to upload", type=["txt"])
        button = st.button('Get summary')
        if button:
            summarizer = summarization_model()
            with st.spinner(text="Summarizing text..."):
                summary = summarizer(text, max_length=130, min_length=30)
                st.write(summary)
                
elif option == 'Text generation':
    st.markdown("<h2 style='text-align: center; color:grey;'>Generate text</h2>", unsafe_allow_html=True)
    text = st.text_input(label='Enter one line of text and let the NLP model generate the rest for you')
    button = st.button('Generate text')
    if button:
        generator = generation_model()
        with st.spinner(text="Generating text..."):
            generated_text = generator(text, max_length=50)
            st.write(generated_text[0]["generated_text"])
                
elif option == 'Sentiment analysis':
    st.markdown("<h2 style='text-align: center; color:grey;'>Classify review</h2>", unsafe_allow_html=True)
    text = st.text_input(label='Enter a sentence to get its sentiment analysis')
    button = st.button('Get sentiment analysis')
    if button:
        sentiment_analysis = sentiment_model()
        with st.spinner(text="Getting sentiment analysis..."):
            sentiment = sentiment_analysis(text)
            st.write(sentiment[0]["label"])