File size: 3,181 Bytes
51caef2
ccc3eca
51caef2
ccc3eca
 
 
 
 
 
c1ddc17
ccc3eca
 
 
c1ddc17
ccc3eca
 
84b6cc7
ccc3eca
84b6cc7
 
ccc3eca
84b6cc7
 
 
 
ccc3eca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e18cd1b
ccc3eca
e18cd1b
ccc3eca
 
 
 
e18cd1b
ccc3eca
d846800
ccc3eca
 
 
 
 
 
 
 
 
 
 
 
 
 
e18cd1b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import streamlit as st
from transformers import pipeline, AutoModelForSequenceClassification, AutoModelForSeq2SeqLM, AutoTokenizer

class CombinedModel:
    def __init__(self, classifier_model, classifier_tokenizer, summarizer_model, summarizer_tokenizer):
        self.classifier_model = classifier_model
        self.classifier_tokenizer = classifier_tokenizer
        self.summarizer_model = summarizer_model
        self.summarizer_tokenizer = summarizer_tokenizer

    def classify_and_summarize(self, text):
        classifier = pipeline("text-classification", model=self.classifier_model, tokenizer=self.classifier_tokenizer, return_all_scores=True)
        summarizer = pipeline("summarization", model=self.summarizer_model, tokenizer=self.summarizer_tokenizer)

        # Classify the text
        classification_results = classifier(text)[0]

        # Determine the label with the highest score
        max_score = float('-inf')
        max_label = ''
        for result in classification_results:
            if result['score'] > max_score:
                max_score = result['score']
                max_label = result['label']

        # Summarize the text
        summary_results = summarizer(text, max_length=50, min_length=25, do_sample=False)

        return max_label, max_score, summary_results[0]['summary_text']

    @classmethod
    def from_pretrained(cls, classifier_path, summarizer_path):
        classifier_model = AutoModelForSequenceClassification.from_pretrained(classifier_path)
        classifier_tokenizer = AutoTokenizer.from_pretrained(classifier_path)
        summarizer_model = AutoModelForSeq2SeqLM.from_pretrained(summarizer_path)
        summarizer_tokenizer = AutoTokenizer.from_pretrained(summarizer_path)
        return cls(classifier_model, classifier_tokenizer, summarizer_model, summarizer_tokenizer)

# Load the combined model
classifier_path = "lqqqqqqqqq/FinetunedModelGr9"
summarizer_path = "lqqqqqqqqq/SummarizeModelGr9"
combined_model = CombinedModel.from_pretrained(classifier_path, summarizer_path)

# Streamlit application title
st.title("Twitter Text Classification and Summarization")
st.write("Classification for 3 labels: negative, neutral, positive")
st.markdown("Please enter at least 50 words & more than 4 sentences for better summarization")

# Text input for user to enter the text to classify
texts_input = st.text_area("Enter the texts to classify and summarize (one text per line)", "")


# Perform text classification and summarization when the user clicks the "Classify" button
if st.button("Classify & Summarize"):
    texts = texts_input.split('\n')
    for text in texts:
        text = text.strip()
        if text:  # Check if text is not empty
            # Classify and summarize the input text
            label, score, summary = combined_model.classify_and_summarize(text)

            # Display the results
            st.write("Text:", text)
            st.write("Label:", label)
            st.write("Score:", score)
            st.write("Summary:", summary)
            st.write("---")
        else:
            st.write("Please enter some text(more than 4 sentences) to classify and summarize.")