dipannitaray commited on
Commit
3e7ec50
1 Parent(s): 8b6f8d6

Upload 4 files

Browse files
Files changed (4) hide show
  1. sentiback2.jpg +0 -0
  2. sentiback3.jpg +0 -0
  3. sr.py +98 -0
  4. tokenizer_and_sequences.pkl +3 -0
sentiback2.jpg ADDED
sentiback3.jpg ADDED
sr.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import tensorflow as tf
4
+ from tensorflow.keras.preprocessing.text import Tokenizer
5
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
6
+ from tensorflow.keras.models import load_model
7
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoModelForSeq2SeqLM
8
+ import torch
9
+ import pickle
10
+ import joblib
11
+
12
+ # Load models and tokenizers
13
+ model = load_model('rnn_lstm_final.h5')
14
+ loaded_model = joblib.load("my_rnn_model.joblib")
15
+
16
+ with open("tokenizer_and_sequences.pkl", "rb") as f:
17
+ tokenizer, data = pickle.load(f)
18
+
19
+ model1 = AutoModelForSequenceClassification.from_pretrained('punjabiSentimentAnalysis')
20
+ tokenizer1 = AutoTokenizer.from_pretrained('punjabiSentimentAnalysis')
21
+
22
+ model_summ = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/MultiIndicSentenceSummarizationSS")
23
+ tokenizer_summ = AutoTokenizer.from_pretrained("ai4bharat/MultiIndicSentenceSummarizationSS",
24
+ do_lower_case=False, use_fast=False, keep_accents=True)
25
+ bos_id = tokenizer_summ._convert_token_to_id_with_added_voc("<s>")
26
+ eos_id = tokenizer_summ._convert_token_to_id_with_added_voc("</s>")
27
+ pad_id = tokenizer_summ._convert_token_to_id_with_added_voc("<pad>")
28
+
29
+ # Define helper functions
30
+ def is_valid_punjabi_text(text):
31
+ english_alphabet = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
32
+ numbers = set("0123456789")
33
+ punctuation = set("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~")
34
+
35
+ for char in text:
36
+ if char in english_alphabet or char in numbers or char in punctuation:
37
+ return False
38
+ return True
39
+
40
+ def predict_sentiment(text, model, tokenizer):
41
+ inputs = tokenizer(text, return_tensors="pt")
42
+ outputs = model(**inputs)
43
+ predicted_class = torch.argmax(outputs.logits, dim=-1).item()
44
+ return "Negative" if predicted_class == 0 else "Positive"
45
+
46
+ def summarize(text):
47
+ input_ids = tokenizer_summ(f"{text} </s> <2pa>", add_special_tokens=False, return_tensors="pt",
48
+ padding=True).input_ids
49
+ model_output = model_summ.generate(input_ids, use_cache=True, no_repeat_ngram_size=3, num_beams=5,
50
+ length_penalty=0.8, max_length=20, min_length=1, early_stopping=True,
51
+ pad_token_id=pad_id, bos_token_id=bos_id, eos_token_id=eos_id,
52
+ decoder_start_token_id=tokenizer_summ._convert_token_to_id_with_added_voc("<2pa>"))
53
+ decoded_output = tokenizer_summ.decode(model_output[0], skip_special_tokens=True,
54
+ clean_up_tokenization_spaces=False)
55
+ return decoded_output
56
+
57
+ def process_input(text):
58
+ a = [text]
59
+ a = tokenizer.texts_to_sequences(a)
60
+ a = np.array(a)
61
+ a = pad_sequences(a, padding='post', maxlen=100)
62
+ a = a.reshape((a.shape[0], a.shape[1], 1))
63
+ prediction = model.predict(np.array(a))
64
+ for row in prediction:
65
+ element1 = row[0]
66
+ element2 = row[1]
67
+ return "Negative" if element1 > element2 else "Positive"
68
+
69
+ # Streamlit app
70
+ st.title("Indic Sentence Summarization & Sentiment Analysis")
71
+ st.header("Insightful Echoes: Crafting Summaries with Sentiments (for ਪੰਜਾਬੀ Text)")
72
+
73
+ model_choice = st.selectbox("Select the Model", ["Indic-Bert", "RNN"])
74
+ summarize_before_sentiment = st.checkbox("Summarize before analyzing sentiment")
75
+ user_input = st.text_area("Enter some text here")
76
+
77
+ if st.button("Analyze Sentiment"):
78
+ if not is_valid_punjabi_text(user_input):
79
+ st.warning("Please enter valid Punjabi text.")
80
+ else:
81
+ sentiment_output = ""
82
+ if summarize_before_sentiment:
83
+ summarized_text = summarize(user_input)
84
+ sentiment_bert = predict_sentiment(summarized_text, model1, tokenizer1)
85
+ sentiment_output = f'Sentiment (Indic-BERT): {sentiment_bert}\nSummary: {summarized_text}'
86
+ else:
87
+ sentiment_bert = predict_sentiment(user_input, model1, tokenizer1)
88
+ sentiment_output = f'Sentiment (Indic-BERT): {sentiment_bert}'
89
+
90
+ if model_choice == "RNN":
91
+ sentiment_rnn = process_input(user_input)
92
+ sentiment_output += f"\nSentiment (Bidirectional LSTM): {sentiment_rnn}"
93
+
94
+ if summarize_before_sentiment:
95
+ summarized_text_rnn = summarize(user_input)
96
+ sentiment_output += f"\nSummary (Bidirectional LSTM): {summarized_text_rnn}"
97
+
98
+ st.text_area("Sentiment Output", sentiment_output, height=200)
tokenizer_and_sequences.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8802bb9e970ab9643357f0b384773dc4a2dd7514a396a3898c5e7903a563e36f
3
+ size 613474