File size: 6,266 Bytes

# import nltk
# import math
# import torch
# # from transformers import AutoModelForSequenceClassification, AutoTokenizer
# # from transformers import AutoTokenizer, AutoModelForSequenceClassification
# from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
# from nltk.tokenize import word_tokenize, sent_tokenize
# from nltk.corpus import stopwords
# from collections import Counter
# from flair.data import Sentence
# from flair.models import SequenceTagger
# nltk.download('stopwords')
# nltk.download('punkt')
# import streamlit as st

# st.set_page_config(layout="wide")



# def divide_sentence(sentence):
#     conjunctions = ["and", "but", "or", "however", "therefore", "furthermore", "nevertheless",'the','i']
#     tokens = nltk.word_tokenize(sentence)
#     subsentences = []
#     current_subsentence = []
#     for token in tokens:
#         if token.lower() in conjunctions:
#             if len(current_subsentence)>0:
#                 subsentences.append(" ".join(current_subsentence))
#             current_subsentence = []
#         else:
#             current_subsentence.append(token)
#     # Add the final subsentence to the list
#     subsentences.append(" ".join(current_subsentence))
# #     print(subsentences)
# #     d={}
# #     for s in subsentences:
# #         d[s] = {'accuracy':None,}
#     return subsentences



# def topic_identify(subsentences):
#     def sigmoid(x):
#         return 1 / (1 + math.exp(-x))
#     tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-dec2021-tweet-topic-multi-all")
#     model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-dec2021-tweet-topic-multi-all", problem_type="multi_label_classification")
#     model.eval()
#     class_mapping = model.config.id2label
#     topics = []
#     for text in subsentences:
#         with torch.no_grad():
#             tokens = tokenizer(text, return_tensors='pt')
#             output = model(**tokens)
#             flags = [sigmoid(s) > 0.5 for s in output[0][0].detach().tolist()]
#             topic = [class_mapping[n] for n, i in enumerate(flags) if i]
#         topics.append(','.join(topic))
#     return topics


# def sentiment_score(subsentences):
#     tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
#     model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
#     from transformers import pipeline
#     sentiment_task = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
#     senti = []
#     for sen in subsentences:
#         a=sentiment_task(sen)
#         # [{'label': 'positive', 'score': 0.9484752416610718}]
#         a=a[0]
#         senti.append(a['label']+' , '+str(a['score']))
#     return senti



# def intent_identify(subsentences):
#     model_name = 'cartesinus/fedcsis-intent_baseline-xlm_r-en'
#     tokenizer = AutoTokenizer.from_pretrained(model_name)
#     model = AutoModelForSequenceClassification.from_pretrained(model_name)
#     classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer)
#     intents = []
#     for s in subsentences:
#         res = classifier(s)
#         a=res[0]
#         intents.append(a['label']+' , '+str(a['score']))
#     return intents



# def entity_identify(subsentences):
#     # load the NER tagger
#     tagger = SequenceTagger.load('ner')
#     # create a sentence to analyze
#     entities = []
#     for sentence in subsentences:
#         sentence = Sentence(sentence)
#         # run NER on the sentence
#         tagger.predict(sentence)
#         # print the entities found in the sentence
#         ent = []
#         for entity in sentence.get_spans('ner'):
#             ent.append(entity.text)
#         entities.append(','.join(ent))
#     return entities



# def keyword_identify(subsentences):
#     class KeywordExtractor:
#         def __init__(self):
#             self.stop_words = set(stopwords.words('english'))
#         def extract_keywords(self, text):
#             # tokenize sentences
#             sentences = sent_tokenize(text)
#             # tokenize words and remove stop words
#             words = [word.lower() for sentence in sentences for word in word_tokenize(sentence) if word.lower() not in self.stop_words and word.isalpha()]
#             # count word frequencies
#             word_freq = Counter(words)
#             # sort words by frequency
#             sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
#             # return top 3 keywords
#             return [word[0] for word in sorted_words[:2]]
#     key = KeywordExtractor()
#     keywords=[]
#     for s in subsentences:
#         keyword = key.extract_keywords(s)
#         keywords.append(','.join(keyword))
#     return keywords
# st.markdown("<h1 style='text-align: center; color: white; background : grey'>Process Fest</h1>", unsafe_allow_html=True)
# import pandas as pd
# import numpy as np
# sent = st.text_input(label = 'Enter the Text:')
# button = st.button('submit')
# #sent = "The stay at AAA was good The food was not that bad but the service was very bad and I prefer BBB than AAA I’ll raise a complaint against AAA"
# if button:
#     subsentences = divide_sentence(sent)
#     topic  = topic_identify(subsentences)
#     sentiment = sentiment_score(subsentences)
#     intent = intent_identify(subsentences)
#     entity = entity_identify(subsentences)
#     keyword = keyword_identify(subsentences)
#     df = pd.DataFrame(
#        {
#         'subsentences': subsentences,
#         'sentiment and score': sentiment,
#         'intent': intent,
#         'entity' : entity,
#         'keyword' : keyword
#        })
#     st.dataframe(data=df, width=None, height=None,use_container_width=False)
import streamlit as st
import pandas as pd
st.title(“A Simple Streamlit Web App”)
name = st.text_input(“Enter your name”, ‘’)
st.write(f”Hello {name}!”)
x = st.slider(“Select an integer x”, 0, 10, 1)
y = st.slider(“Select an integer y”, 0, 10, 1)
df = pd.DataFrame({“x”: [x], “y”: [y] , “x + y”: [x + y]}, index = [“addition row”])
st.write(df)