Spaces:

pranjal065
/

process_fest

Runtime error

App Files Files Community

pranjal065 commited on Mar 24, 2023

Commit

282bb56

•

1 Parent(s): 15dd55e

Update app.py

Browse files

Files changed (1) hide show

app.py +156 -159

app.py CHANGED Viewed

@@ -1,161 +1,158 @@
-# -*- coding: utf-8 -*-
-"""Untitled3.ipynb
-Automatically generated by Colaboratory.
-Original file is located at
-    https://colab.research.google.com/drive/18DTgeDomshKNQMgYQ6y6mJbBom9mRw5l
-"""
-# Commented out IPython magic to ensure Python compatibility.
-# %%writefile app.py
-# %%writefile 'app.py'
-import nltk
-import math
-import torch
-# from transformers import AutoModelForSequenceClassification, AutoTokenizer
-# from transformers import AutoTokenizer, AutoModelForSequenceClassification
-from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
-from nltk.tokenize import word_tokenize, sent_tokenize
-from nltk.corpus import stopwords
-from collections import Counter
-from flair.data import Sentence
-from flair.models import SequenceTagger
-nltk.download('stopwords')
-nltk.download('punkt')
-import streamlit as st
-st.set_page_config(layout="wide")
-def divide_sentence(sentence):
-    conjunctions = ["and", "but", "or", "however", "therefore", "furthermore", "nevertheless",'the','i']
-    tokens = nltk.word_tokenize(sentence)
-    subsentences = []
-    current_subsentence = []
-    for token in tokens:
-        if token.lower() in conjunctions:
-            if len(current_subsentence)>0:
-                subsentences.append(" ".join(current_subsentence))
-            current_subsentence = []
-        else:
-            current_subsentence.append(token)
-    # Add the final subsentence to the list
-    subsentences.append(" ".join(current_subsentence))
-#     print(subsentences)
-#     d={}
 #     for s in subsentences:
-#         d[s] = {'accuracy':None,}
-    return subsentences
-def topic_identify(subsentences):
-    def sigmoid(x):
-        return 1 / (1 + math.exp(-x))
-    tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-dec2021-tweet-topic-multi-all")
-    model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-dec2021-tweet-topic-multi-all", problem_type="multi_label_classification")
-    model.eval()
-    class_mapping = model.config.id2label
-    topics = []
-    for text in subsentences:
-        with torch.no_grad():
-            tokens = tokenizer(text, return_tensors='pt')
-            output = model(**tokens)
-            flags = [sigmoid(s) > 0.5 for s in output[0][0].detach().tolist()]
-            topic = [class_mapping[n] for n, i in enumerate(flags) if i]
-        topics.append(','.join(topic))
-    return topics
-def sentiment_score(subsentences):
-    tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
-    model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
-    from transformers import pipeline
-    sentiment_task = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
-    senti = []
-    for sen in subsentences:
-        a=sentiment_task(sen)
-        # [{'label': 'positive', 'score': 0.9484752416610718}]
-        a=a[0]
-        senti.append(a['label']+' , '+str(a['score']))
-    return senti
-def intent_identify(subsentences):
-    model_name = 'cartesinus/fedcsis-intent_baseline-xlm_r-en'
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    model = AutoModelForSequenceClassification.from_pretrained(model_name)
-    classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer)
-    intents = []
-    for s in subsentences:
-        res = classifier(s)
-        a=res[0]
-        intents.append(a['label']+' , '+str(a['score']))
-    return intents
-def entity_identify(subsentences):
-    # load the NER tagger
-    tagger = SequenceTagger.load('ner')
-    # create a sentence to analyze
-    entities = []
-    for sentence in subsentences:
-        sentence = Sentence(sentence)
-        # run NER on the sentence
-        tagger.predict(sentence)
-        # print the entities found in the sentence
-        ent = []
-        for entity in sentence.get_spans('ner'):
-            ent.append(entity.text)
-        entities.append(','.join(ent))
-    return entities
-def keyword_identify(subsentences):
-    class KeywordExtractor:
-        def __init__(self):
-            self.stop_words = set(stopwords.words('english'))
-        def extract_keywords(self, text):
-            # tokenize sentences
-            sentences = sent_tokenize(text)
-            # tokenize words and remove stop words
-            words = [word.lower() for sentence in sentences for word in word_tokenize(sentence) if word.lower() not in self.stop_words and word.isalpha()]
-            # count word frequencies
-            word_freq = Counter(words)
-            # sort words by frequency
-            sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
-            # return top 3 keywords
-            return [word[0] for word in sorted_words[:2]]
-    key = KeywordExtractor()
-    keywords=[]
-    for s in subsentences:
-        keyword = key.extract_keywords(s)
-        keywords.append(','.join(keyword))
-    return keywords
-st.markdown("<h1 style='text-align: center; color: white; background : grey'>Process Fest</h1>", unsafe_allow_html=True)
 import pandas as pd
-import numpy as np
-sent = st.text_input(label = 'Enter the Text:')
-button = st.button('submit')
-#sent = "The stay at AAA was good The food was not that bad but the service was very bad and I prefer BBB than AAA I’ll raise a complaint against AAA"
-if button:
-    subsentences = divide_sentence(sent)
-    topic  = topic_identify(subsentences)
-    sentiment = sentiment_score(subsentences)
-    intent = intent_identify(subsentences)
-    entity = entity_identify(subsentences)
-    keyword = keyword_identify(subsentences)
-    df = pd.DataFrame(
-       {
-        'subsentences': subsentences,
-        'sentiment and score': sentiment,
-        'intent': intent,
-        'entity' : entity,
-        'keyword' : keyword
-       })
-    st.dataframe(data=df, width=None, height=None,use_container_width=False)

+# import nltk
+# import math
+# import torch
+# # from transformers import AutoModelForSequenceClassification, AutoTokenizer
+# # from transformers import AutoTokenizer, AutoModelForSequenceClassification
+# from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
+# from nltk.tokenize import word_tokenize, sent_tokenize
+# from nltk.corpus import stopwords
+# from collections import Counter
+# from flair.data import Sentence
+# from flair.models import SequenceTagger
+# nltk.download('stopwords')
+# nltk.download('punkt')
+# import streamlit as st
+# st.set_page_config(layout="wide")
+# def divide_sentence(sentence):
+#     conjunctions = ["and", "but", "or", "however", "therefore", "furthermore", "nevertheless",'the','i']
+#     tokens = nltk.word_tokenize(sentence)
+#     subsentences = []
+#     current_subsentence = []
+#     for token in tokens:
+#         if token.lower() in conjunctions:
+#             if len(current_subsentence)>0:
+#                 subsentences.append(" ".join(current_subsentence))
+#             current_subsentence = []
+#         else:
+#             current_subsentence.append(token)
+#     # Add the final subsentence to the list
+#     subsentences.append(" ".join(current_subsentence))
+# #     print(subsentences)
+# #     d={}
+# #     for s in subsentences:
+# #         d[s] = {'accuracy':None,}
+#     return subsentences
+# def topic_identify(subsentences):
+#     def sigmoid(x):
+#         return 1 / (1 + math.exp(-x))
+#     tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-dec2021-tweet-topic-multi-all")
+#     model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-dec2021-tweet-topic-multi-all", problem_type="multi_label_classification")
+#     model.eval()
+#     class_mapping = model.config.id2label
+#     topics = []
+#     for text in subsentences:
+#         with torch.no_grad():
+#             tokens = tokenizer(text, return_tensors='pt')
+#             output = model(**tokens)
+#             flags = [sigmoid(s) > 0.5 for s in output[0][0].detach().tolist()]
+#             topic = [class_mapping[n] for n, i in enumerate(flags) if i]
+#         topics.append(','.join(topic))
+#     return topics
+# def sentiment_score(subsentences):
+#     tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
+#     model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
+#     from transformers import pipeline
+#     sentiment_task = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
+#     senti = []
+#     for sen in subsentences:
+#         a=sentiment_task(sen)
+#         # [{'label': 'positive', 'score': 0.9484752416610718}]
+#         a=a[0]
+#         senti.append(a['label']+' , '+str(a['score']))
+#     return senti
+# def intent_identify(subsentences):
+#     model_name = 'cartesinus/fedcsis-intent_baseline-xlm_r-en'
+#     tokenizer = AutoTokenizer.from_pretrained(model_name)
+#     model = AutoModelForSequenceClassification.from_pretrained(model_name)
+#     classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer)
+#     intents = []
 #     for s in subsentences:
+#         res = classifier(s)
+#         a=res[0]
+#         intents.append(a['label']+' , '+str(a['score']))
+#     return intents
+# def entity_identify(subsentences):
+#     # load the NER tagger
+#     tagger = SequenceTagger.load('ner')
+#     # create a sentence to analyze
+#     entities = []
+#     for sentence in subsentences:
+#         sentence = Sentence(sentence)
+#         # run NER on the sentence
+#         tagger.predict(sentence)
+#         # print the entities found in the sentence
+#         ent = []
+#         for entity in sentence.get_spans('ner'):
+#             ent.append(entity.text)
+#         entities.append(','.join(ent))
+#     return entities
+# def keyword_identify(subsentences):
+#     class KeywordExtractor:
+#         def __init__(self):
+#             self.stop_words = set(stopwords.words('english'))
+#         def extract_keywords(self, text):
+#             # tokenize sentences
+#             sentences = sent_tokenize(text)
+#             # tokenize words and remove stop words
+#             words = [word.lower() for sentence in sentences for word in word_tokenize(sentence) if word.lower() not in self.stop_words and word.isalpha()]
+#             # count word frequencies
+#             word_freq = Counter(words)
+#             # sort words by frequency
+#             sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
+#             # return top 3 keywords
+#             return [word[0] for word in sorted_words[:2]]
+#     key = KeywordExtractor()
+#     keywords=[]
+#     for s in subsentences:
+#         keyword = key.extract_keywords(s)
+#         keywords.append(','.join(keyword))
+#     return keywords
+# st.markdown("<h1 style='text-align: center; color: white; background : grey'>Process Fest</h1>", unsafe_allow_html=True)
+# import pandas as pd
+# import numpy as np
+# sent = st.text_input(label = 'Enter the Text:')
+# button = st.button('submit')
+# #sent = "The stay at AAA was good The food was not that bad but the service was very bad and I prefer BBB than AAA I’ll raise a complaint against AAA"
+# if button:
+#     subsentences = divide_sentence(sent)
+#     topic  = topic_identify(subsentences)
+#     sentiment = sentiment_score(subsentences)
+#     intent = intent_identify(subsentences)
+#     entity = entity_identify(subsentences)
+#     keyword = keyword_identify(subsentences)
+#     df = pd.DataFrame(
+#        {
+#         'subsentences': subsentences,
+#         'sentiment and score': sentiment,
+#         'intent': intent,
+#         'entity' : entity,
+#         'keyword' : keyword
+#        })
+#     st.dataframe(data=df, width=None, height=None,use_container_width=False)
+import streamlit as st
 import pandas as pd
+st.title(“A Simple Streamlit Web App”)
+name = st.text_input(“Enter your name”, ‘’)
+st.write(f”Hello {name}!”)
+x = st.slider(“Select an integer x”, 0, 10, 1)
+y = st.slider(“Select an integer y”, 0, 10, 1)
+df = pd.DataFrame({“x”: [x], “y”: [y] , “x + y”: [x + y]}, index = [“addition row”])
+st.write(df)