File size: 8,526 Bytes
a987248
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import pandas as pd
import numpy as np
import requests
from sentence_transformers import SentenceTransformer, util

model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

def load_csv(projJson, index, projectData, projectUUID):
    # print(projectUUID)
    outName = "uploader/uploads/" + projectUUID + "/" + projJson[int(index)]["filename"]

    if "http" in  projJson[int(index)]["filename"]:
        df = readfile(projJson[int(index)]["filename"])
    else:
        df = readfile(outName)

    if 'translated_text' not in df.columns:
        try:
            df['translated_text'] = df['clean_text_emotions']
        except:
            try:
                df['translated_text'] = df['split_summary_y']
            except:
                try:
                    df['translated_text'] = df['split_summary']
                except:
                    try:
                        df['translated_text'] = df['clean_text']
                    except:
                        try:
                            df['translated_text'] = df['text']
                        except:
                            try:
                                df['translated_text'] = df['content']
                            except:
                                try:
                                    df['translated_text'] = df['Comment']
                                except:
                                    try:
                                        df['translated_text'] = df['summary']
                                    except:
                                        df['translated_text'] = df['body']


    #  df['translated_text'] in this col, remove all @xxx mentions
    df['translated_text'] = df['translated_text'].str.replace(r'@\w+', '', regex=True)


    if 'embeddings' not in df.columns:

        if 'encoding' in df.columns:
            if type(df['encoding'][0]) == str:
                embeddings_array = np.vstack(df['encoding'].apply(
                    lambda x: np.fromstring(x[1:-1], sep=' ', dtype=np.float32)))
                df['embeddings'] = list(embeddings_array)
            else:
                df['embeddings'] = df['encoding']
            
        elif 'paragraph_embeddings' in df.columns:
            if type(df['paragraph_embeddings'][0]) == str:
                embeddings_array = np.vstack(df['paragraph_embeddings'].apply(
                lambda x: np.fromstring(x[1:-1], sep=' ', dtype=np.float32)))
                df['embeddings'] = list(embeddings_array)
            else:
                df['embeddings'] = df['paragraph_embeddings']
                
        else:
            df['embeddings'] = df['translated_text'].apply(
                lambda x: model.encode(x))
        


        # save the file
        if outName.endswith('.csv'):
            df.to_csv(outName, index=False)
        elif outName.endswith('.xlsx'):
            df.to_excel(outName, index=False)
        elif outName.endswith('.h5'):
            df.to_hdf(outName, key='df', mode='w')
    else:
        if type(df['embeddings'][0]) == str:
            embeddings_array = np.vstack(df['embeddings'].apply(
                lambda x: np.fromstring(x[1:-1], sep=' ', dtype=np.float32)))
            df['embeddings'] = list(embeddings_array)
        else:
            pass


    finished_loading = """<div class='ddesc'><header>
            <h1>Welcome to your Talkback environment!</h1>
            <p>Please select one of the predefined analysis on the left</p>
        </header>
        <main>
<section>
    <h2>Executive Summary</h2>
    <p>Our platform is a powerful tool that allows users to easily query and analyze their data. With intuitive visualizations and a simple user interface, even non-technical users can quickly gain insights into their data. Whether you're a small business owner or a data scientist, our platform has the features you need to make data-driven decisions.</p>
</section>

<section class='socialonly'>
    <h2>Topic cluster</h2>
    <p>Our platform now includes a powerful feature for topic clustering, allowing users to identify the main themes and topics within their data. This feature uses advanced algorithms to group similar data points together based on their content, making it easier to identify patterns and trends. With topic clustering, users can quickly gain insights into the main topics driving their data, and use this information to inform their decision-making.</p>
</section>
<section class='socialonly'>
    <h2>Sentiment Analysis</h2>
    <p>We've also recently added a sentiment analysis feature, which allows users to analyze the sentiment of their data. With this feature, users can understand the emotions and attitudes expressed within their data, and use this information to gain a deeper understanding of their customers or audience. Sentiment analysis can be particularly useful for businesses looking to understand customer feedback or opinions on their products or services. By analyzing the sentiment of customer feedback, businesses can identify areas for improvement and make data-driven decisions to improve customer satisfaction.   </p>
</section>

<section class='newsonly'>
    <h2>Trend Analysis</h2>
    <p>Our platform has been seeing steady growth in user adoption, with an increasing number of businesses turning to data analysis to inform their decision-making. We've also noticed a trend towards more demand for real-time data insights, which we plan to address with upcoming features. Additionally, we've seen a shift towards cloud-based solutions, and we're exploring ways to make our platform more flexible and scalable for our users.</p>
</section>
<section class='newsonly'>
    <h2>Competitor Analysis</h2>
    <p>We've identified several competitors in the data analysis space, including both established players and newer startups. While some of our competitors offer more advanced features, we believe our focus on ease of use and customer service sets us apart. Our platform is designed to be accessible to users of all skill levels, from beginners to experts. We also differentiate ourselves by offering a range of integrations with popular data sources, such as Google Analytics and Salesforce.</p>
</section>

<section>
    <h2>SWOT Analysis</h2>
    <p>One of the strengths of our platform is its ease of use, which allows users to quickly get up and running with their data analysis. However, we also recognize that there are areas where we can improve, such as adding more advanced features and expanding our integrations. We see a major opportunity in the growing demand for data analysis tools, especially among small and medium-sized businesses. Our biggest threat comes from larger competitors with more resources, but we believe our focus on usability and customer service will set us apart.</p>
</section>
<section>
    <h2>Talk to your data</h2>
    <p>In addition to the predefined analysis set you can also use this option that allows you to interact with your data in a more natural and intuitive way.</p>
</section>
</main></div>"""

    if projJson[int(index)]["intro"]:
        finished_loading = projJson[int(index)]["intro"]
        
    formattedres = '<p style="color:black;text-align:left;">' + finished_loading + '</p>'

    tdf = df[['translated_text']][0:30]
    # rename translated_text column
    # tdf.rename(columns={'translated_text': 'Content'}, inplace=True)
    

    if "http" in outName:
        outName = projJson[int(index)]["label"] + "API"

    return df, finished_loading, tdf, formattedres, outName, '<span class="hsub">Dataset: </span><span class="selHelper">' + projJson[int(index)]["label"] + " <i>(n="+str(len(df))+")</i></span>"


def readfile(filename):
    if ".xlsx" in filename:
        df = pd.read_excel(filename)
    elif ".csv" in filename:
        df = pd.read_csv(filename, delimiter=',', encoding='utf-8-sig')
    elif ".h5" in filename:
        df = pd.read_hdf(filename)
    elif "http" in filename:
        # api request data
        response = requests.get(filename)
        df = pd.DataFrame(response.json())
        # HARDCODED!!!

        if "QC10" in df.columns:
            df = df[['QC10']]
            df.dropna(inplace=True)
            df.rename(columns={'QC10': 'translated_text'}, inplace=True)
        
        if "Q12" in df.columns:
            df = df[['Q12']]
            df.dropna(inplace=True)
            df.rename(columns={'Q12': 'translated_text'}, inplace=True)
            
    df.fillna('', inplace=True)
    return df