Spaces:

greco
/

survey_analytics_spaces

Runtime error

App Files Files Community

greco commited on Jul 18, 2022

Commit

0c0c0b8

1 Parent(s): 9485a72

update library

Browse files

Files changed (1) hide show

survey_analytics_library.py +0 -150

survey_analytics_library.py CHANGED Viewed

@@ -1,20 +1,7 @@
 # imports
 import pandas as pd
-import numpy as np
-import streamlit as st
-from tqdm.notebook import tqdm
-import matplotlib.pyplot as plt
-import plotly.express as px
-from sklearn.cluster import KMeans
-from sklearn.metrics import silhouette_score
-import zipfile
-from xml.etree.cElementTree import XML
 import re
-from nltk.corpus import stopwords
@@ -82,143 +69,6 @@ def clean_text(text_string, list_of_replacements, lowercase=True, ignorecase=Fal
-# remove stopwords from tokens
-def remove_stopwords(tokens, language='english'):
-    '''
-    remove stopwords from tokens using list comprehension
-    default to using english stopwords
-    arguments:
-    tokens (list): list of token#s, output of word_tokenize()
-    language (str): default to english
-    returns:
-    a list of tokens without stopwords
-    '''
-    # define stopwords and store as a set
-    stopwords_set = set(stopwords.words(language))
-    # check if word is in list of stopwords
-    # returns a list of words not found in list of stopwords
-    stopwords_removed = [word for word in tokens if word not in stopwords_set]
-    # return
-    return stopwords_removed
-import itertools
-from typing import List
-import plotly.graph_objects as go
-from plotly.subplots import make_subplots
-def visualize_barchart_titles(topic_model,
-                       topics: List[int] = None,
-                       subplot_titles: List[str] = None,
-                       top_n_topics: int = 8,
-                       n_words: int = 5,
-                       width: int = 250,
-                       height: int = 250) -> go.Figure:
-    """ Visualize a barchart of selected topics
-    Arguments:
-        topic_model: A fitted BERTopic instance.
-        topics: A selection of topics to visualize.
-        top_n_topics: Only select the top n most frequent topics.
-        n_words: Number of words to show in a topic
-        width: The width of each figure.
-        height: The height of each figure.
-    Returns:
-        fig: A plotly figure
-    Usage:
-    To visualize the barchart of selected topics
-    simply run:
-    ```python
-    topic_model.visualize_barchart()
-    ```
-    Or if you want to save the resulting figure:
-    ```python
-    fig = topic_model.visualize_barchart()
-    fig.write_html("path/to/file.html")
-    ```
-    <iframe src="../../getting_started/visualization/bar_chart.html"
-    style="width:1100px; height: 660px; border: 0px;""></iframe>
-    """
-    colors = itertools.cycle(["#D55E00", "#0072B2", "#CC79A7", "#E69F00", "#56B4E9", "#009E73", "#F0E442"])
-    # Select topics based on top_n and topics args
-    freq_df = topic_model.get_topic_freq()
-    freq_df = freq_df.loc[freq_df.Topic != -1, :]
-    if topics is not None:
-        topics = list(topics)
-    elif top_n_topics is not None:
-        topics = sorted(freq_df.Topic.to_list()[:top_n_topics])
-    else:
-        topics = sorted(freq_df.Topic.to_list()[0:6])
-    # Initialize figure
-    if subplot_titles is None:
-        subplot_titles = [f"Topic {topic}" for topic in topics]
-    else:
-        subplot_titles = subplot_titles
-    columns = 4
-    rows = int(np.ceil(len(topics) / columns))
-    fig = make_subplots(rows=rows,
-                        cols=columns,
-                        shared_xaxes=False,
-                        horizontal_spacing=.1,
-                        vertical_spacing=.4 / rows if rows > 1 else 0,
-                        subplot_titles=subplot_titles)
-    # Add barchart for each topic
-    row = 1
-    column = 1
-    for topic in topics:
-        words = [word + "  " for word, _ in topic_model.get_topic(topic)][:n_words][::-1]
-        scores = [score for _, score in topic_model.get_topic(topic)][:n_words][::-1]
-        fig.add_trace(
-            go.Bar(x=scores,
-                   y=words,
-                   orientation='h',
-                   marker_color=next(colors)),
-            row=row, col=column)
-        if column == columns:
-            column = 1
-            row += 1
-        else:
-            column += 1
-    # Stylize graph
-    fig.update_layout(
-        template="plotly_white",
-        showlegend=False,
-        title={
-            'text': "<b>Topic Word Scores",
-            'x': .5,
-            'xanchor': 'center',
-            'yanchor': 'top',
-            'font': dict(
-                size=22,
-                color="Black")
-        },
-        width=width*4,
-        height=height*rows if rows > 1 else height * 1.3,
-        hoverlabel=dict(
-            bgcolor="white",
-            font_size=16,
-            font_family="Rockwell"
-        ),
-    )
-    fig.update_xaxes(showgrid=True)
-    fig.update_yaxes(showgrid=True)
-    return fig
 # convert transformer model zero shot classification prediction into dataframe
 def convert_zero_shot_classification_output_to_dataframe(model_output):

 # imports
 import pandas as pd
 import re
 # convert transformer model zero shot classification prediction into dataframe
 def convert_zero_shot_classification_output_to_dataframe(model_output):