Spaces:

pranjal065
/

process_fest

Runtime error

App Files Files Community

process_fest / app.py

pranjal065

Update app.py

282bb56 over 1 year ago

raw

history blame contribute delete

No virus

6.27 kB

	# import nltk
	# import math
	# import torch
	# # from transformers import AutoModelForSequenceClassification, AutoTokenizer
	# # from transformers import AutoTokenizer, AutoModelForSequenceClassification
	# from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
	# from nltk.tokenize import word_tokenize, sent_tokenize
	# from nltk.corpus import stopwords
	# from collections import Counter
	# from flair.data import Sentence
	# from flair.models import SequenceTagger
	# nltk.download('stopwords')
	# nltk.download('punkt')
	# import streamlit as st

	# st.set_page_config(layout="wide")



	# def divide_sentence(sentence):
	# conjunctions = ["and", "but", "or", "however", "therefore", "furthermore", "nevertheless",'the','i']
	# tokens = nltk.word_tokenize(sentence)
	# subsentences = []
	# current_subsentence = []
	# for token in tokens:
	# if token.lower() in conjunctions:
	# if len(current_subsentence)>0:
	# subsentences.append(" ".join(current_subsentence))
	# current_subsentence = []
	# else:
	# current_subsentence.append(token)
	# # Add the final subsentence to the list
	# subsentences.append(" ".join(current_subsentence))
	# # print(subsentences)
	# # d={}
	# # for s in subsentences:
	# # d[s] = {'accuracy':None,}
	# return subsentences



	# def topic_identify(subsentences):
	# def sigmoid(x):
	# return 1 / (1 + math.exp(-x))
	# tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-dec2021-tweet-topic-multi-all")
	# model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-dec2021-tweet-topic-multi-all", problem_type="multi_label_classification")
	# model.eval()
	# class_mapping = model.config.id2label
	# topics = []
	# for text in subsentences:
	# with torch.no_grad():
	# tokens = tokenizer(text, return_tensors='pt')
	# output = model(**tokens)
	# flags = [sigmoid(s) > 0.5 for s in output[0][0].detach().tolist()]
	# topic = [class_mapping[n] for n, i in enumerate(flags) if i]
	# topics.append(','.join(topic))
	# return topics


	# def sentiment_score(subsentences):
	# tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
	# model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
	# from transformers import pipeline
	# sentiment_task = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
	# senti = []
	# for sen in subsentences:
	# a=sentiment_task(sen)
	# # [{'label': 'positive', 'score': 0.9484752416610718}]
	# a=a[0]
	# senti.append(a['label']+' , '+str(a['score']))
	# return senti



	# def intent_identify(subsentences):
	# model_name = 'cartesinus/fedcsis-intent_baseline-xlm_r-en'
	# tokenizer = AutoTokenizer.from_pretrained(model_name)
	# model = AutoModelForSequenceClassification.from_pretrained(model_name)
	# classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer)
	# intents = []
	# for s in subsentences:
	# res = classifier(s)
	# a=res[0]
	# intents.append(a['label']+' , '+str(a['score']))
	# return intents



	# def entity_identify(subsentences):
	# # load the NER tagger
	# tagger = SequenceTagger.load('ner')
	# # create a sentence to analyze
	# entities = []
	# for sentence in subsentences:
	# sentence = Sentence(sentence)
	# # run NER on the sentence
	# tagger.predict(sentence)
	# # print the entities found in the sentence
	# ent = []
	# for entity in sentence.get_spans('ner'):
	# ent.append(entity.text)
	# entities.append(','.join(ent))
	# return entities



	# def keyword_identify(subsentences):
	# class KeywordExtractor:
	# def __init__(self):
	# self.stop_words = set(stopwords.words('english'))
	# def extract_keywords(self, text):
	# # tokenize sentences
	# sentences = sent_tokenize(text)
	# # tokenize words and remove stop words
	# words = [word.lower() for sentence in sentences for word in word_tokenize(sentence) if word.lower() not in self.stop_words and word.isalpha()]
	# # count word frequencies
	# word_freq = Counter(words)
	# # sort words by frequency
	# sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
	# # return top 3 keywords
	# return [word[0] for word in sorted_words[:2]]
	# key = KeywordExtractor()
	# keywords=[]
	# for s in subsentences:
	# keyword = key.extract_keywords(s)
	# keywords.append(','.join(keyword))
	# return keywords
	# st.markdown("<h1 style='text-align: center; color: white; background : grey'>Process Fest</h1>", unsafe_allow_html=True)
	# import pandas as pd
	# import numpy as np
	# sent = st.text_input(label = 'Enter the Text:')
	# button = st.button('submit')
	# #sent = "The stay at AAA was good The food was not that bad but the service was very bad and I prefer BBB than AAA I’ll raise a complaint against AAA"
	# if button:
	# subsentences = divide_sentence(sent)
	# topic = topic_identify(subsentences)
	# sentiment = sentiment_score(subsentences)
	# intent = intent_identify(subsentences)
	# entity = entity_identify(subsentences)
	# keyword = keyword_identify(subsentences)
	# df = pd.DataFrame(
	# {
	# 'subsentences': subsentences,
	# 'sentiment and score': sentiment,
	# 'intent': intent,
	# 'entity' : entity,
	# 'keyword' : keyword
	# })
	# st.dataframe(data=df, width=None, height=None,use_container_width=False)
	import streamlit as st
	import pandas as pd
	st.title(“A Simple Streamlit Web App”)
	name = st.text_input(“Enter your name”, ‘’)
	st.write(f”Hello {name}!”)
	x = st.slider(“Select an integer x”, 0, 10, 1)
	y = st.slider(“Select an integer y”, 0, 10, 1)
	df = pd.DataFrame({“x”: [x], “y”: [y] , “x + y”: [x + y]}, index = [“addition row”])
	st.write(df)