File size: 6,266 Bytes
282bb56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c7baaec
282bb56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c7baaec
282bb56
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# import nltk
# import math
# import torch
# # from transformers import AutoModelForSequenceClassification, AutoTokenizer
# # from transformers import AutoTokenizer, AutoModelForSequenceClassification
# from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
# from nltk.tokenize import word_tokenize, sent_tokenize
# from nltk.corpus import stopwords
# from collections import Counter
# from flair.data import Sentence
# from flair.models import SequenceTagger
# nltk.download('stopwords')
# nltk.download('punkt')
# import streamlit as st

# st.set_page_config(layout="wide")



# def divide_sentence(sentence):
#     conjunctions = ["and", "but", "or", "however", "therefore", "furthermore", "nevertheless",'the','i']
#     tokens = nltk.word_tokenize(sentence)
#     subsentences = []
#     current_subsentence = []
#     for token in tokens:
#         if token.lower() in conjunctions:
#             if len(current_subsentence)>0:
#                 subsentences.append(" ".join(current_subsentence))
#             current_subsentence = []
#         else:
#             current_subsentence.append(token)
#     # Add the final subsentence to the list
#     subsentences.append(" ".join(current_subsentence))
# #     print(subsentences)
# #     d={}
# #     for s in subsentences:
# #         d[s] = {'accuracy':None,}
#     return subsentences



# def topic_identify(subsentences):
#     def sigmoid(x):
#         return 1 / (1 + math.exp(-x))
#     tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-dec2021-tweet-topic-multi-all")
#     model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-dec2021-tweet-topic-multi-all", problem_type="multi_label_classification")
#     model.eval()
#     class_mapping = model.config.id2label
#     topics = []
#     for text in subsentences:
#         with torch.no_grad():
#             tokens = tokenizer(text, return_tensors='pt')
#             output = model(**tokens)
#             flags = [sigmoid(s) > 0.5 for s in output[0][0].detach().tolist()]
#             topic = [class_mapping[n] for n, i in enumerate(flags) if i]
#         topics.append(','.join(topic))
#     return topics


# def sentiment_score(subsentences):
#     tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
#     model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
#     from transformers import pipeline
#     sentiment_task = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
#     senti = []
#     for sen in subsentences:
#         a=sentiment_task(sen)
#         # [{'label': 'positive', 'score': 0.9484752416610718}]
#         a=a[0]
#         senti.append(a['label']+' , '+str(a['score']))
#     return senti



# def intent_identify(subsentences):
#     model_name = 'cartesinus/fedcsis-intent_baseline-xlm_r-en'
#     tokenizer = AutoTokenizer.from_pretrained(model_name)
#     model = AutoModelForSequenceClassification.from_pretrained(model_name)
#     classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer)
#     intents = []
#     for s in subsentences:
#         res = classifier(s)
#         a=res[0]
#         intents.append(a['label']+' , '+str(a['score']))
#     return intents



# def entity_identify(subsentences):
#     # load the NER tagger
#     tagger = SequenceTagger.load('ner')
#     # create a sentence to analyze
#     entities = []
#     for sentence in subsentences:
#         sentence = Sentence(sentence)
#         # run NER on the sentence
#         tagger.predict(sentence)
#         # print the entities found in the sentence
#         ent = []
#         for entity in sentence.get_spans('ner'):
#             ent.append(entity.text)
#         entities.append(','.join(ent))
#     return entities



# def keyword_identify(subsentences):
#     class KeywordExtractor:
#         def __init__(self):
#             self.stop_words = set(stopwords.words('english'))
#         def extract_keywords(self, text):
#             # tokenize sentences
#             sentences = sent_tokenize(text)
#             # tokenize words and remove stop words
#             words = [word.lower() for sentence in sentences for word in word_tokenize(sentence) if word.lower() not in self.stop_words and word.isalpha()]
#             # count word frequencies
#             word_freq = Counter(words)
#             # sort words by frequency
#             sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
#             # return top 3 keywords
#             return [word[0] for word in sorted_words[:2]]
#     key = KeywordExtractor()
#     keywords=[]
#     for s in subsentences:
#         keyword = key.extract_keywords(s)
#         keywords.append(','.join(keyword))
#     return keywords
# st.markdown("<h1 style='text-align: center; color: white; background : grey'>Process Fest</h1>", unsafe_allow_html=True)
# import pandas as pd
# import numpy as np
# sent = st.text_input(label = 'Enter the Text:')
# button = st.button('submit')
# #sent = "The stay at AAA was good The food was not that bad but the service was very bad and I prefer BBB than AAA I’ll raise a complaint against AAA"
# if button:
#     subsentences = divide_sentence(sent)
#     topic  = topic_identify(subsentences)
#     sentiment = sentiment_score(subsentences)
#     intent = intent_identify(subsentences)
#     entity = entity_identify(subsentences)
#     keyword = keyword_identify(subsentences)
#     df = pd.DataFrame(
#        {
#         'subsentences': subsentences,
#         'sentiment and score': sentiment,
#         'intent': intent,
#         'entity' : entity,
#         'keyword' : keyword
#        })
#     st.dataframe(data=df, width=None, height=None,use_container_width=False)
import streamlit as st
import pandas as pd
st.title(“A Simple Streamlit Web App”)
name = st.text_input(“Enter your name”, ‘’)
st.write(f”Hello {name}!”)
x = st.slider(“Select an integer x”, 0, 10, 1)
y = st.slider(“Select an integer y”, 0, 10, 1)
df = pd.DataFrame({“x”: [x], “y”: [y] , “x + y”: [x + y]}, index = [“addition row”])
st.write(df)