Spaces:
Runtime error
Runtime error
# import nltk | |
# import math | |
# import torch | |
# # from transformers import AutoModelForSequenceClassification, AutoTokenizer | |
# # from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
# from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline | |
# from nltk.tokenize import word_tokenize, sent_tokenize | |
# from nltk.corpus import stopwords | |
# from collections import Counter | |
# from flair.data import Sentence | |
# from flair.models import SequenceTagger | |
# nltk.download('stopwords') | |
# nltk.download('punkt') | |
# import streamlit as st | |
# st.set_page_config(layout="wide") | |
# def divide_sentence(sentence): | |
# conjunctions = ["and", "but", "or", "however", "therefore", "furthermore", "nevertheless",'the','i'] | |
# tokens = nltk.word_tokenize(sentence) | |
# subsentences = [] | |
# current_subsentence = [] | |
# for token in tokens: | |
# if token.lower() in conjunctions: | |
# if len(current_subsentence)>0: | |
# subsentences.append(" ".join(current_subsentence)) | |
# current_subsentence = [] | |
# else: | |
# current_subsentence.append(token) | |
# # Add the final subsentence to the list | |
# subsentences.append(" ".join(current_subsentence)) | |
# # print(subsentences) | |
# # d={} | |
# # for s in subsentences: | |
# # d[s] = {'accuracy':None,} | |
# return subsentences | |
# def topic_identify(subsentences): | |
# def sigmoid(x): | |
# return 1 / (1 + math.exp(-x)) | |
# tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-dec2021-tweet-topic-multi-all") | |
# model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-dec2021-tweet-topic-multi-all", problem_type="multi_label_classification") | |
# model.eval() | |
# class_mapping = model.config.id2label | |
# topics = [] | |
# for text in subsentences: | |
# with torch.no_grad(): | |
# tokens = tokenizer(text, return_tensors='pt') | |
# output = model(**tokens) | |
# flags = [sigmoid(s) > 0.5 for s in output[0][0].detach().tolist()] | |
# topic = [class_mapping[n] for n, i in enumerate(flags) if i] | |
# topics.append(','.join(topic)) | |
# return topics | |
# def sentiment_score(subsentences): | |
# tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest") | |
# model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest") | |
# from transformers import pipeline | |
# sentiment_task = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer) | |
# senti = [] | |
# for sen in subsentences: | |
# a=sentiment_task(sen) | |
# # [{'label': 'positive', 'score': 0.9484752416610718}] | |
# a=a[0] | |
# senti.append(a['label']+' , '+str(a['score'])) | |
# return senti | |
# def intent_identify(subsentences): | |
# model_name = 'cartesinus/fedcsis-intent_baseline-xlm_r-en' | |
# tokenizer = AutoTokenizer.from_pretrained(model_name) | |
# model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
# classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer) | |
# intents = [] | |
# for s in subsentences: | |
# res = classifier(s) | |
# a=res[0] | |
# intents.append(a['label']+' , '+str(a['score'])) | |
# return intents | |
# def entity_identify(subsentences): | |
# # load the NER tagger | |
# tagger = SequenceTagger.load('ner') | |
# # create a sentence to analyze | |
# entities = [] | |
# for sentence in subsentences: | |
# sentence = Sentence(sentence) | |
# # run NER on the sentence | |
# tagger.predict(sentence) | |
# # print the entities found in the sentence | |
# ent = [] | |
# for entity in sentence.get_spans('ner'): | |
# ent.append(entity.text) | |
# entities.append(','.join(ent)) | |
# return entities | |
# def keyword_identify(subsentences): | |
# class KeywordExtractor: | |
# def __init__(self): | |
# self.stop_words = set(stopwords.words('english')) | |
# def extract_keywords(self, text): | |
# # tokenize sentences | |
# sentences = sent_tokenize(text) | |
# # tokenize words and remove stop words | |
# words = [word.lower() for sentence in sentences for word in word_tokenize(sentence) if word.lower() not in self.stop_words and word.isalpha()] | |
# # count word frequencies | |
# word_freq = Counter(words) | |
# # sort words by frequency | |
# sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True) | |
# # return top 3 keywords | |
# return [word[0] for word in sorted_words[:2]] | |
# key = KeywordExtractor() | |
# keywords=[] | |
# for s in subsentences: | |
# keyword = key.extract_keywords(s) | |
# keywords.append(','.join(keyword)) | |
# return keywords | |
# st.markdown("<h1 style='text-align: center; color: white; background : grey'>Process Fest</h1>", unsafe_allow_html=True) | |
# import pandas as pd | |
# import numpy as np | |
# sent = st.text_input(label = 'Enter the Text:') | |
# button = st.button('submit') | |
# #sent = "The stay at AAA was good The food was not that bad but the service was very bad and I prefer BBB than AAA I’ll raise a complaint against AAA" | |
# if button: | |
# subsentences = divide_sentence(sent) | |
# topic = topic_identify(subsentences) | |
# sentiment = sentiment_score(subsentences) | |
# intent = intent_identify(subsentences) | |
# entity = entity_identify(subsentences) | |
# keyword = keyword_identify(subsentences) | |
# df = pd.DataFrame( | |
# { | |
# 'subsentences': subsentences, | |
# 'sentiment and score': sentiment, | |
# 'intent': intent, | |
# 'entity' : entity, | |
# 'keyword' : keyword | |
# }) | |
# st.dataframe(data=df, width=None, height=None,use_container_width=False) | |
import streamlit as st | |
import pandas as pd | |
st.title(“A Simple Streamlit Web App”) | |
name = st.text_input(“Enter your name”, ‘’) | |
st.write(f”Hello {name}!”) | |
x = st.slider(“Select an integer x”, 0, 10, 1) | |
y = st.slider(“Select an integer y”, 0, 10, 1) | |
df = pd.DataFrame({“x”: [x], “y”: [y] , “x + y”: [x + y]}, index = [“addition row”]) | |
st.write(df) |