process_fest / app.py
pranjal065's picture
Update app.py
282bb56
raw
history blame contribute delete
No virus
6.27 kB
# import nltk
# import math
# import torch
# # from transformers import AutoModelForSequenceClassification, AutoTokenizer
# # from transformers import AutoTokenizer, AutoModelForSequenceClassification
# from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
# from nltk.tokenize import word_tokenize, sent_tokenize
# from nltk.corpus import stopwords
# from collections import Counter
# from flair.data import Sentence
# from flair.models import SequenceTagger
# nltk.download('stopwords')
# nltk.download('punkt')
# import streamlit as st
# st.set_page_config(layout="wide")
# def divide_sentence(sentence):
# conjunctions = ["and", "but", "or", "however", "therefore", "furthermore", "nevertheless",'the','i']
# tokens = nltk.word_tokenize(sentence)
# subsentences = []
# current_subsentence = []
# for token in tokens:
# if token.lower() in conjunctions:
# if len(current_subsentence)>0:
# subsentences.append(" ".join(current_subsentence))
# current_subsentence = []
# else:
# current_subsentence.append(token)
# # Add the final subsentence to the list
# subsentences.append(" ".join(current_subsentence))
# # print(subsentences)
# # d={}
# # for s in subsentences:
# # d[s] = {'accuracy':None,}
# return subsentences
# def topic_identify(subsentences):
# def sigmoid(x):
# return 1 / (1 + math.exp(-x))
# tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-dec2021-tweet-topic-multi-all")
# model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-dec2021-tweet-topic-multi-all", problem_type="multi_label_classification")
# model.eval()
# class_mapping = model.config.id2label
# topics = []
# for text in subsentences:
# with torch.no_grad():
# tokens = tokenizer(text, return_tensors='pt')
# output = model(**tokens)
# flags = [sigmoid(s) > 0.5 for s in output[0][0].detach().tolist()]
# topic = [class_mapping[n] for n, i in enumerate(flags) if i]
# topics.append(','.join(topic))
# return topics
# def sentiment_score(subsentences):
# tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
# model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
# from transformers import pipeline
# sentiment_task = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
# senti = []
# for sen in subsentences:
# a=sentiment_task(sen)
# # [{'label': 'positive', 'score': 0.9484752416610718}]
# a=a[0]
# senti.append(a['label']+' , '+str(a['score']))
# return senti
# def intent_identify(subsentences):
# model_name = 'cartesinus/fedcsis-intent_baseline-xlm_r-en'
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForSequenceClassification.from_pretrained(model_name)
# classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer)
# intents = []
# for s in subsentences:
# res = classifier(s)
# a=res[0]
# intents.append(a['label']+' , '+str(a['score']))
# return intents
# def entity_identify(subsentences):
# # load the NER tagger
# tagger = SequenceTagger.load('ner')
# # create a sentence to analyze
# entities = []
# for sentence in subsentences:
# sentence = Sentence(sentence)
# # run NER on the sentence
# tagger.predict(sentence)
# # print the entities found in the sentence
# ent = []
# for entity in sentence.get_spans('ner'):
# ent.append(entity.text)
# entities.append(','.join(ent))
# return entities
# def keyword_identify(subsentences):
# class KeywordExtractor:
# def __init__(self):
# self.stop_words = set(stopwords.words('english'))
# def extract_keywords(self, text):
# # tokenize sentences
# sentences = sent_tokenize(text)
# # tokenize words and remove stop words
# words = [word.lower() for sentence in sentences for word in word_tokenize(sentence) if word.lower() not in self.stop_words and word.isalpha()]
# # count word frequencies
# word_freq = Counter(words)
# # sort words by frequency
# sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
# # return top 3 keywords
# return [word[0] for word in sorted_words[:2]]
# key = KeywordExtractor()
# keywords=[]
# for s in subsentences:
# keyword = key.extract_keywords(s)
# keywords.append(','.join(keyword))
# return keywords
# st.markdown("<h1 style='text-align: center; color: white; background : grey'>Process Fest</h1>", unsafe_allow_html=True)
# import pandas as pd
# import numpy as np
# sent = st.text_input(label = 'Enter the Text:')
# button = st.button('submit')
# #sent = "The stay at AAA was good The food was not that bad but the service was very bad and I prefer BBB than AAA I’ll raise a complaint against AAA"
# if button:
# subsentences = divide_sentence(sent)
# topic = topic_identify(subsentences)
# sentiment = sentiment_score(subsentences)
# intent = intent_identify(subsentences)
# entity = entity_identify(subsentences)
# keyword = keyword_identify(subsentences)
# df = pd.DataFrame(
# {
# 'subsentences': subsentences,
# 'sentiment and score': sentiment,
# 'intent': intent,
# 'entity' : entity,
# 'keyword' : keyword
# })
# st.dataframe(data=df, width=None, height=None,use_container_width=False)
import streamlit as st
import pandas as pd
st.title(“A Simple Streamlit Web App”)
name = st.text_input(“Enter your name”, ‘’)
st.write(f”Hello {name}!”)
x = st.slider(“Select an integer x”, 0, 10, 1)
y = st.slider(“Select an integer y”, 0, 10, 1)
df = pd.DataFrame({“x”: [x], “y”: [y] , “x + y”: [x + y]}, index = [“addition row”])
st.write(df)