Spaces:
Runtime error
Runtime error
File size: 6,266 Bytes
282bb56 c7baaec 282bb56 c7baaec 282bb56 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
# import nltk
# import math
# import torch
# # from transformers import AutoModelForSequenceClassification, AutoTokenizer
# # from transformers import AutoTokenizer, AutoModelForSequenceClassification
# from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
# from nltk.tokenize import word_tokenize, sent_tokenize
# from nltk.corpus import stopwords
# from collections import Counter
# from flair.data import Sentence
# from flair.models import SequenceTagger
# nltk.download('stopwords')
# nltk.download('punkt')
# import streamlit as st
# st.set_page_config(layout="wide")
# def divide_sentence(sentence):
# conjunctions = ["and", "but", "or", "however", "therefore", "furthermore", "nevertheless",'the','i']
# tokens = nltk.word_tokenize(sentence)
# subsentences = []
# current_subsentence = []
# for token in tokens:
# if token.lower() in conjunctions:
# if len(current_subsentence)>0:
# subsentences.append(" ".join(current_subsentence))
# current_subsentence = []
# else:
# current_subsentence.append(token)
# # Add the final subsentence to the list
# subsentences.append(" ".join(current_subsentence))
# # print(subsentences)
# # d={}
# # for s in subsentences:
# # d[s] = {'accuracy':None,}
# return subsentences
# def topic_identify(subsentences):
# def sigmoid(x):
# return 1 / (1 + math.exp(-x))
# tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-dec2021-tweet-topic-multi-all")
# model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-dec2021-tweet-topic-multi-all", problem_type="multi_label_classification")
# model.eval()
# class_mapping = model.config.id2label
# topics = []
# for text in subsentences:
# with torch.no_grad():
# tokens = tokenizer(text, return_tensors='pt')
# output = model(**tokens)
# flags = [sigmoid(s) > 0.5 for s in output[0][0].detach().tolist()]
# topic = [class_mapping[n] for n, i in enumerate(flags) if i]
# topics.append(','.join(topic))
# return topics
# def sentiment_score(subsentences):
# tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
# model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
# from transformers import pipeline
# sentiment_task = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
# senti = []
# for sen in subsentences:
# a=sentiment_task(sen)
# # [{'label': 'positive', 'score': 0.9484752416610718}]
# a=a[0]
# senti.append(a['label']+' , '+str(a['score']))
# return senti
# def intent_identify(subsentences):
# model_name = 'cartesinus/fedcsis-intent_baseline-xlm_r-en'
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForSequenceClassification.from_pretrained(model_name)
# classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer)
# intents = []
# for s in subsentences:
# res = classifier(s)
# a=res[0]
# intents.append(a['label']+' , '+str(a['score']))
# return intents
# def entity_identify(subsentences):
# # load the NER tagger
# tagger = SequenceTagger.load('ner')
# # create a sentence to analyze
# entities = []
# for sentence in subsentences:
# sentence = Sentence(sentence)
# # run NER on the sentence
# tagger.predict(sentence)
# # print the entities found in the sentence
# ent = []
# for entity in sentence.get_spans('ner'):
# ent.append(entity.text)
# entities.append(','.join(ent))
# return entities
# def keyword_identify(subsentences):
# class KeywordExtractor:
# def __init__(self):
# self.stop_words = set(stopwords.words('english'))
# def extract_keywords(self, text):
# # tokenize sentences
# sentences = sent_tokenize(text)
# # tokenize words and remove stop words
# words = [word.lower() for sentence in sentences for word in word_tokenize(sentence) if word.lower() not in self.stop_words and word.isalpha()]
# # count word frequencies
# word_freq = Counter(words)
# # sort words by frequency
# sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
# # return top 3 keywords
# return [word[0] for word in sorted_words[:2]]
# key = KeywordExtractor()
# keywords=[]
# for s in subsentences:
# keyword = key.extract_keywords(s)
# keywords.append(','.join(keyword))
# return keywords
# st.markdown("<h1 style='text-align: center; color: white; background : grey'>Process Fest</h1>", unsafe_allow_html=True)
# import pandas as pd
# import numpy as np
# sent = st.text_input(label = 'Enter the Text:')
# button = st.button('submit')
# #sent = "The stay at AAA was good The food was not that bad but the service was very bad and I prefer BBB than AAA I’ll raise a complaint against AAA"
# if button:
# subsentences = divide_sentence(sent)
# topic = topic_identify(subsentences)
# sentiment = sentiment_score(subsentences)
# intent = intent_identify(subsentences)
# entity = entity_identify(subsentences)
# keyword = keyword_identify(subsentences)
# df = pd.DataFrame(
# {
# 'subsentences': subsentences,
# 'sentiment and score': sentiment,
# 'intent': intent,
# 'entity' : entity,
# 'keyword' : keyword
# })
# st.dataframe(data=df, width=None, height=None,use_container_width=False)
import streamlit as st
import pandas as pd
st.title(“A Simple Streamlit Web App”)
name = st.text_input(“Enter your name”, ‘’)
st.write(f”Hello {name}!”)
x = st.slider(“Select an integer x”, 0, 10, 1)
y = st.slider(“Select an integer y”, 0, 10, 1)
df = pd.DataFrame({“x”: [x], “y”: [y] , “x + y”: [x + y]}, index = [“addition row”])
st.write(df) |