File size: 23,214 Bytes
7fa3a99
 
 
 
 
 
 
bc7b615
7fa3a99
1093a00
7fa3a99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc7b615
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7fa3a99
 
 
 
 
bc7b615
 
 
 
 
7fa3a99
bc7b615
7fa3a99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84f637a
 
 
 
 
 
bc7b615
84f637a
 
 
 
 
 
7fa3a99
 
 
84f637a
 
 
 
 
7fa3a99
 
 
 
 
 
 
 
84f637a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7fa3a99
 
 
84f637a
 
 
 
 
 
 
 
7fa3a99
 
 
84f637a
 
 
 
 
7fa3a99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc7b615
 
7fa3a99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc7b615
7fa3a99
 
bc7b615
 
7fa3a99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from sklearn.manifold import TSNE
from test import get_plot

unfiltered_spaces_with_outliers = pd.read_csv('hugging_face_spaces.csv')
spaces = unfiltered_spaces_with_outliers[unfiltered_spaces_with_outliers['likes'] >= 3]
# spaces = spaces_with_outliers[(np.abs(stats.zscore(spaces_with_outliers['likes'])) < 3)]
# print(spaces.describe())
# print(spaces.describe().columns)
# print(spaces.describe().astype(str).to_numpy().tolist())
# spaces = spaces[spaces[['total_commits', 'community_interactions']].notna()]
# spaces[['total_commits', 'community_interactions']] = spaces[['total_commits', 'community_interactions']].astype('Int64')

descriptions = [
    'Unique id of a space, comprised of username followed by the space name separated by a "/"',
    'SDK of space, one of gradio, streamlit or static',
    'Total number of likes associated with the space',
    'Username of the user',
    'Name of the space',
    'URL associated to the space',
    'Various forms of input the space takes in',
    'Various entities the space outputs',
    'Is True if the space repo has an app.py',
    'AI/ML related python packages that were used in the making of the model',
    'The date of the last known commit on the space\'s repository',
    'The total number of commits on the space\'s repository',
    'The current status of space',
    'The total number of contributions or interactions from the community on the space\'s repository',
]


def info(df_in):
    df_info = df_in.columns.to_frame(name='Column')
    df_info['Non-Null Count'] = df_in.notna().sum()
    df_info['Datatype'] = df_in.dtypes
    df_info.reset_index(drop=True, inplace=True)
    df_info['#'] = df_info.index
    df_info['Description'] = descriptions
    return df_info[['#', 'Column', 'Non-Null Count', 'Datatype', 'Description']].astype(str)


def get_nulls():
    fig, ax = plt.subplots(figsize=(25, 5))
    sns.barplot(x=spaces.columns, y=spaces.notnull().sum(), ax=ax, palette="tab20c_r")
    ax.set_xticklabels(spaces.columns)
    ax.set_title(f"Non-Null values in each column ( Total rows: {len(spaces.index)} )\n", fontsize="x-large")
    for i, val in enumerate(spaces.notnull().sum()):
        plt.text(i, val+9, val, horizontalalignment='center', verticalalignment='bottom', fontdict={'fontweight':500, 'size':12})
    ax.set_yticklabels([])
    ax.set_yticks([False])
    sns.despine(top=True, right=True, left=True)
    return fig


def get_corr():
    fig = plt.figure(figsize=(10, 5))
    sns.heatmap(spaces.corr(), annot=True, linewidths=.5, fmt='.1f')
    plt.tight_layout()
    return fig


def get_corr_df():
    corr = spaces.corr()
    corr = corr.reset_index()
    return corr.astype(str)


def get_corr_scatter_total_commits():
    fig = plt.figure(figsize=(15, 5))
    plt.scatter(spaces['likes'], spaces['total_commits'])
    plt.tight_layout()
    return fig


def get_corr_scatter_community_interactions():
    fig = plt.figure(figsize=(15, 5))
    plt.scatter(spaces['likes'], spaces['community_interactions'])
    plt.tight_layout()
    return fig


def circular_bar_plot():
    # top_spaces = spaces[(spaces[filter_by] >= min_value) & (spaces[filter_by] <= max_value)]
    # top_spaces = top_spaces.sort_values(filter_by, ascending=True if sort_by == 'ascending' else False)
    # top_spaces = top_spaces[['repo_id', filter_by]].iloc[:quantity]
    # labels = list("ABCDEFG")
    # data = [21, 57, 88, 14, 76, 91, 26]
    # fig = plt.figure(figsize=(10, 11))
    # n = len(data)
    # k = 10 ** int(log10(max(data)))
    # m = k * (1 + max(data) // k)
    # r = 1.5
    # w = r / n
    # colors = [cm.terrain(i / n) for i in range(n)]
    # plt.axis("equal")
    # for i in range(n):
    #     innerring, _ = plt.pie([m - data[i], data[i]], radius = r - i * w, startangle = 90, labels = ["", labels[i]], labeldistance = 1 - 1 / (1.5 * (n - i)), textprops = {"alpha": 0}, colors = ["white", colors[i]])
    #     plt.setp(innerring, width = w, edgecolor = "white")
    #
    # plt.legend()
    # return fig
    cathegories = ["Electronics", "Appliances", "Books", "Music", "Clothing", "Cars", "Food/Beverages", "Personal Hygiene",
                   "Personal Health/OTC", "Hair Care"]
    percent = [81, 77, 70, 69, 69, 68, 62, 62, 61, 60]

    # number of data points
    n = len(percent)
    # percent of circle to draw for the largest circle
    percent_circle = max(percent) / 100

    r = 1.5  # outer radius of the chart
    r_inner = 0.4  # inner radius of the chart
    # calculate width of each ring
    w = (r - r_inner) / n

    # create colors along a chosen colormap
    #colors = [plt.cm.plasma(i / n) for i in range(n)]
    colors = plt.cm.tab10.colors

    # create figure, axis
    fig = plt.figure(figsize=(10, 11))
    plt.axis("equal")

    for i in range(n):
        radius = r - i * w
        plt.pie([percent[i] / max(percent) * percent_circle], radius=radius, startangle=90,
               counterclock=False,
               colors=[colors[i]],
               labels=[f'{cathegories[i]}{percent[i]}%'], labeldistance=None,
               wedgeprops={'width': w, 'edgecolor': 'white'})
        plt.text(0, radius - w / 2, f'{cathegories[i]}{percent[i]}% ', ha='right', va='center')

    # plt.legend(loc='upper right', bbox_to_anchor=(0.5, 1.1), prop={'size': 12})
    plt.tight_layout()
    return fig


def get_top_spaces(quantity, min_value, max_value, filter_by, sort_by):
    top_spaces = spaces[(spaces[filter_by] >= min_value) & (spaces[filter_by] <= max_value)]
    top_spaces = top_spaces.sort_values(filter_by, ascending=True if sort_by == 'ascending' else False)
    top_spaces = top_spaces[['repo_id', filter_by]].iloc[:quantity]

    fig = plt.figure(figsize=(10, 11))
    all_top_spaces = spaces.sort_values('likes', ascending=False)[['repo_id', 'likes', 'total_commits', 'community_interactions']].iloc[:100]
    plt.scatter(all_top_spaces['community_interactions'], all_top_spaces['total_commits'], s=all_top_spaces['likes'])
    # plt.bar(top_spaces['repo_id'], top_spaces[filter_by])
    # plt.xticks(rotation=30)
    plt.tight_layout()
    return gr.Dataframe.update(value=top_spaces.astype(str).to_numpy().tolist()), fig


def change_limits(filter_by):
    updated_slider = gr.Slider.update(minimum=spaces[filter_by].min(), maximum=spaces[filter_by].max(), value=3)
    return updated_slider, updated_slider


def get_most_spaces():
    most_spaces = spaces['user_name'].value_counts().sort_values(ascending=False).reset_index().iloc[:7]
    fig = plt.figure(figsize=(20, 10))
    plt.barh(most_spaces.iloc[:, 0], most_spaces.iloc[:, 1])
    plt.xticks(rotation=30)
    plt.tight_layout()
    return most_spaces, fig


def get_most_liked_users():
    y = pd.pivot_table(
        spaces,
        index=['user_name'],
        aggfunc={'likes': np.sum, 'user_name': len}
    ).sort_values('likes', ascending=False).rename(columns={'user_name': 'space_count'}).iloc[:10].reset_index()
    y['likes'] = y['likes'].astype(int)
    y['space_count'] = y['space_count'].astype(int)
    fig = plt.figure(figsize=(20, 8))
    sns.set_theme()
    sns.relplot(data=y.iloc[:7], x='user_name', y='likes', col='space_count')
    plt.show()
    return y.iloc[:7].astype(str), fig


def pie_plot(data, figsize=(10, 5)):
    colors = ['#ff9999', '#66b3ff', '#99ff99', '#ffcc99']

    fig = plt.figure(figsize=figsize)
    for i, (categories, counts) in enumerate(data.items()):
        plt.subplot(1, len(data), i+1)
        plt.pie(list(map(int, counts.split(","))), colors=colors, labels=categories.split(","), startangle=90)
        # draw circle
        centre_circle = plt.Circle((0, 0), 0.70, fc='white')
        plt.gcf().gca().add_artist(centre_circle)
        # Equal aspect ratio ensures that pie is drawn as a circle
        plt.axis('equal')
        plt.tight_layout()
    return fig


def get_sdk_proportions():
    df = spaces.groupby('sdk').size().reset_index(name='counts')
    return pie_plot({",".join(list(df['sdk'])): ",".join(df['counts'].astype(str))})


def get_sdk_frequencies():
    fig = plt.figure()
    sns.stripplot(x='sdk', y='likes', data=spaces, jitter=True)
    plt.tight_layout()
    return fig


def get_io_proportions():
    inputs = [y.split(',') for y in spaces[spaces['inputs'].notnull()]['inputs'].values]
    inputs = [x for xs in inputs for x in xs]
    inputs = pd.Series(inputs).value_counts()
    mask = (inputs/inputs.sum() * 100).lt(2)
    updated_inputs = inputs[~mask]
    updated_inputs['Other'] = inputs[mask].sum()

    outputs = [y.split(',') for y in spaces[spaces['outputs'].notnull()]['outputs'].values]
    outputs = [x for xs in outputs for x in xs]
    outputs = pd.Series(outputs).value_counts()
    mask = (outputs/outputs.sum() * 100).lt(2)
    updated_outputs = outputs[~mask]
    updated_outputs['Other'] = outputs[mask].sum()
    
    return pie_plot({
        ",".join(list(updated_inputs.index.astype(str))): ",".join(updated_inputs.values.astype(str)),
        ",".join(list(updated_outputs.index.astype(str))): ",".join(updated_outputs.values.astype(str))
    })


def get_packages_proportions():
    spaces_ai_reqs = [y.split(',') for y in spaces[spaces['ai_ml_reqs'].notnull()]['ai_ml_reqs'].values]
    spaces_ai_reqs = [x for xs in spaces_ai_reqs for x in xs]
    spaces_ai_reqs = pd.Series(spaces_ai_reqs).value_counts()
    mask = (spaces_ai_reqs/spaces_ai_reqs.sum() * 100).lt(3)
    updated_spaces_ai_reqs = spaces_ai_reqs[~mask]
    updated_spaces_ai_reqs['Other'] = spaces_ai_reqs[mask].sum()
    print(updated_spaces_ai_reqs)
    return pie_plot({",".join(list(updated_spaces_ai_reqs.index.astype(str))): ",".join(updated_spaces_ai_reqs.values.astype(str))})


def get_processable_spaces_proportions():
    spaces_status = spaces['status'].value_counts()
    mask = (spaces_status/spaces_status.sum() * 100).lt(5)
    updated_spaces_status = spaces_status[~mask]
    updated_spaces_status['Error'] = spaces_status[mask].sum()
    return pie_plot({",".join(list(updated_spaces_status.index.astype(str))): ",".join(updated_spaces_status.values.astype(str))})


def get_tsne():
    spaces_numeric = spaces[['likes', 'total_commits', 'community_interactions']]
    spaces_numeric = spaces_numeric.dropna()
    fig = plt.figure()
    m = TSNE(learning_rate=50)
    tsne_features = m.fit_transform(spaces_numeric)

    spaces_numeric['x'] = tsne_features[:, 0]
    spaces_numeric['y'] = tsne_features[:, 1]
    spaces_numeric['sdk'] = spaces['sdk']
    spaces_numeric['like_class'] = spaces_numeric['likes'].apply(get_likes_description)

    sns.scatterplot(x='x', y='y', hue='sdk', style='like_class', data=spaces_numeric)
    return fig


def get_likes_description(likes):
    if likes < spaces['likes'].mean():
        return "Below Average"
    elif likes < (spaces['likes'].mean() + (spaces['likes'].std() * 1.5)):
        return "Good"
    else:
        return "Awesome"


with gr.Blocks(css="""
                    #md {width: 60%; padding: 0px 10px 0px} 
                    #plot {width: 40%; margin: auto;} 
                    #spacer {padding: 15px 15px 15px 15px}
                    #top_spaces_plot {height: 435px}
               """) as demo:
    gr.Markdown("""
        # Exploring the statistics of 🤗 Spaces
        Hugging Face Spaces offer a simple way to host ML demo apps directly on your profile or your organization’s profile. This allows you to create your ML portfolio, showcase your projects at conferences or to stakeholders, and work collaboratively with other people in the ML ecosystem.

        This is an interactive blog that provides an overview of all the present spaces on 🤗
        
        **Data Set**: The above and related information was collected by DeepKlarity using the HuggingFace Spaces API Endpoint and extracting data individually from each space's repository via scraping

        The dataset can be accessed from the git repository here

        ## Read the data
        ```
        spaces = pd.read_csv('hugging_face_spaces.csv')
        ```
        ### Data columns info and descriptions:
        The following table gives insights into each of the columns in the aforementioned dataset and their respective descriptions
    """)
    gr.Dataframe(type="numpy",
                 headers=['#', 'Column', 'Non-Null Count', 'Datatype', 'Description'],
                 value=info(unfiltered_spaces_with_outliers).to_numpy().tolist(),
                 datatype=['number', 'str', 'number', 'str', 'str'],
                 row_count=14,
                 col_count=5,
                 )
    gr.Markdown(f"""
        ### Identifying valid spaces
        For the sake of keeping things simple and logical, lets assume that a space is valid only if it has a minimum of 3 likes. Keeping this in mind, the size of the new filtered dataset falls from {len(unfiltered_spaces_with_outliers.index)} to {len(spaces.index)}.\n
    """)
    gr.Dataframe(type="numpy",
                 headers=list(spaces.columns),
                 value=spaces.astype(str).iloc[:5].to_numpy().tolist(),
                 row_count=5,
                 col_count=14,
                 )
    gr.Markdown(f"""

        The below plot summarizes the actual amount of non-null values present in each of the columns of the new filtered dataset.
    """)
    gr.Plot(value=get_nulls())
    gr.Markdown("""
        There seems to be lot of columns with empty/ null values. Perhaps familiarity with each of the columns and what exactly it captures will help in identifying which fields are useful for the analysis and which fields can be ignored. Here are the reasons for `NaN`s in each of these columns:
        - **inputs, outputs**: An absence of any type of inputs/outputs represents a null. Moreover, the script corresponding to this dataset was able to only extract inputs/outputs for Gradio spaces. Therefore, any spaces that have Streamlit or Static as their sdk will also have a null in these columns
        - **ai_ml_reqs**: Any spaces that do not use any of the AI/ML packages will have a null in this column
        - **last_commit, total_commits, status, community_interactions**: These columns should have around the same number of nulls and represents the fact that an error occurred when scraping space's remote repository
        ### Finding correlations between characteristics 
    """)
    with gr.Row():
        # with gr.Column():
        gr.Markdown("""
            Looking at the correlations and plot, we can see that all no 2 columns correlate quite well as they all have a Pearsons R-value of less than 0.4. However of all the columns, the `likes` column correlated the best with `total_commits` and `community_interactions`\r
            Although not always true, this does make sense because wel-made HuggingFace Spaces are bound to get more likes and in turn mean that the authors had worked on it for quite a long time and therefore made a lot of commits. Secondly, a good space is also bound to get more attention, i.e. more interactions from the community
            Below are the corresponding 2 Scatter plots 
        """, elem_id='md')
        # with gr.Row():
        #     gr.Plot(value=get_corr_scatter_total_commits())
        #     gr.Plot(value=get_corr_scatter_community_interactions())
        # with gr.Column():
        gr.Plot(value=get_corr(), elem_id='plot')
    gr.Markdown("""
        ## Questions that we can try to answer?
        One of the key reasons to writing this blog is to perform Exploratory Data Analysis (EDA) on the Huggingface Spaces datatset. We will be acomplishing this and gain insights by answering some high level questions as follows
        ### What are the top n spaces on HuggingFace filtered by x?
    """)
    with gr.Row():
        with gr.Column():
            quantity = gr.Slider(minimum=1, maximum=10, value=10, step=1, label="Quantity:", show_label=True)
            min_value = gr.Slider(minimum=spaces['likes'].min(), maximum=spaces['likes'].max(), value=3, step=1, label="Min Value of Quantity:", show_label=True)
            max_value = gr.Slider(minimum=spaces['likes'].min(), maximum=spaces['likes'].max(), value=3, step=1, label="Max Value of Quantity:", show_label=True)
            filter_by = gr.Radio(choices=['likes', 'total_commits', 'community_interactions'], value='likes', label="Filter by:", show_label=True)
            sort_by = gr.Radio(choices=['ascending', 'descending'], value='descending', label="Sort by:", show_label=True)
            submit = gr.Button(value='Submit')
        with gr.Column():
            data_points = gr.Dataframe(
                type="numpy",
                headers=["Repo ID", 'Value'],
                datatype=["str", "number"],
                value=spaces.sort_values('likes', ascending=False)[['repo_id', 'likes']].iloc[:10].astype(str).to_numpy().tolist(),
            )
        with gr.Column():
            # data_plot = gr.Plot(value=get_plot(), elem_id='top_spaces_plot')
            data_plot = gr.Plot(value=get_top_spaces(10, 3, spaces['likes'].max(), 'likes', 'descending')[1], elem_id='top_spaces_plot')
    filter_by.change(fn=change_limits, inputs=[filter_by], outputs=[min_value, max_value])
    submit.click(fn=get_top_spaces, inputs=[quantity, min_value, max_value, filter_by, sort_by], outputs=[data_points, data_plot])
    gr.Markdown("", elem_id='spacer')
    with gr.Row():
        gr.Markdown(f"""
            ### What is the highest number of spaces created by any one user?
            
            Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec id rutrum diam, sed euismod lacus. Vivamus posuere, nibh sit amet dictum bibendum, tortor ligula faucibus odio, id mattis dolor erat eu ante. Quisque et velit nec libero viverra commodo in a augue. Quisque posuere leo arcu, in pretium ipsum dignissim non.   
               
            Curabitur in purus est. Proin vitae egestas orci, sit amet elementum urna. Sed condimentum rutrum erat, in vulputate purus consectetur sit amet. Cras rutrum mattis ante id malesuada. Duis luctus, erat vel imperdiet condimentum, elit lorem tincidunt sem, sit amet maximus arcu erat at ex.
        """, elem_id='md')
        gr.Plot(value=get_most_spaces()[1], elem_id='plot')
    gr.Markdown("", elem_id='spacer')
    with gr.Row():
        gr.Plot(value=get_most_liked_users()[1], elem_id='plot')
        gr.Markdown("""
            ### Which users has achieved the most likes?
            
            Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec id rutrum diam, sed euismod lacus. Vivamus posuere, nibh sit amet dictum bibendum, tortor ligula faucibus odio, id mattis dolor erat eu ante. Quisque et velit nec libero viverra commodo in a augue. Quisque posuere leo arcu, in pretium ipsum dignissim non.   
               
            Curabitur in purus est. Proin vitae egestas orci, sit amet elementum urna. Sed condimentum rutrum erat, in vulputate purus consectetur sit amet. Cras rutrum mattis ante id malesuada. Duis luctus, erat vel imperdiet condimentum, elit lorem tincidunt sem, sit amet maximus arcu erat at ex.
        """, elem_id='md')
    gr.Markdown("", elem_id='spacer')
    with gr.Row():
        gr.Markdown("""
            ### What are the proportions of the different SDKs used in creating spaces?
            
            Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec id rutrum diam, sed euismod lacus. Vivamus posuere, nibh sit amet dictum bibendum, tortor ligula faucibus odio, id mattis dolor erat eu ante. Quisque et velit nec libero viverra commodo in a augue. Quisque posuere leo arcu, in pretium ipsum dignissim non.   
               
            Curabitur in purus est. Proin vitae egestas orci, sit amet elementum urna. Sed condimentum rutrum erat, in vulputate purus consectetur sit amet. Cras rutrum mattis ante id malesuada. Duis luctus, erat vel imperdiet condimentum, elit lorem tincidunt sem, sit amet maximus arcu erat at ex.
        """, elem_id='md')
        gr.Plot(value=get_sdk_proportions(), elem_id='plot')
    gr.Markdown("", elem_id='spacer')
    with gr.Row():
        gr.Plot(value=get_processable_spaces_proportions(), elem_id='plot')
        gr.Markdown("""
            ### How many spaces are processable?
            
            Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec id rutrum diam, sed euismod lacus. Vivamus posuere, nibh sit amet dictum bibendum, tortor ligula faucibus odio, id mattis dolor erat eu ante. Quisque et velit nec libero viverra commodo in a augue. Quisque posuere leo arcu, in pretium ipsum dignissim non.   
               
            Curabitur in purus est. Proin vitae egestas orci, sit amet elementum urna. Sed condimentum rutrum erat, in vulputate purus consectetur sit amet. Cras rutrum mattis ante id malesuada. Duis luctus, erat vel imperdiet condimentum, elit lorem tincidunt sem, sit amet maximus arcu erat at ex.
        """, elem_id='md')
    gr.Markdown("", elem_id='spacer')
    with gr.Row():
        gr.Markdown("""
            ### What are the different types of input and output components used and which of them are used widely?
            
            Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec id rutrum diam, sed euismod lacus. Vivamus posuere, nibh sit amet dictum bibendum, tortor ligula faucibus odio, id mattis dolor erat eu ante. Quisque et velit nec libero viverra commodo in a augue. Quisque posuere leo arcu, in pretium ipsum dignissim non.   
               
            Curabitur in purus est. Proin vitae egestas orci, sit amet elementum urna. Sed condimentum rutrum erat, in vulputate purus consectetur sit amet. Cras rutrum mattis ante id malesuada. Duis luctus, erat vel imperdiet condimentum, elit lorem tincidunt sem, sit amet maximus arcu erat at ex.
        """, elem_id='md')
        gr.Plot(value=get_io_proportions(), elem_id='plot')
    gr.Markdown("", elem_id='spacer')
    with gr.Row():
        gr.Plot(value=get_packages_proportions(), elem_id='plot')
        gr.Markdown("""
            ### Which AI/ML python packages are used the most?
            
            Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec id rutrum diam, sed euismod lacus. Vivamus posuere, nibh sit amet dictum bibendum, tortor ligula faucibus odio, id mattis dolor erat eu ante. Quisque et velit nec libero viverra commodo in a augue. Quisque posuere leo arcu, in pretium ipsum dignissim non.   
               
            Curabitur in purus est. Proin vitae egestas orci, sit amet elementum urna. Sed condimentum rutrum erat, in vulputate purus consectetur sit amet. Cras rutrum mattis ante id malesuada. Duis luctus, erat vel imperdiet condimentum, elit lorem tincidunt sem, sit amet maximus arcu erat at ex.
        """, elem_id='md')
    gr.Markdown("", elem_id='spacer')
    gr.Markdown("""
        ## Dataset in a nutshell
    """)
    # with gr.Row():
    #     with gr.Column():
    #         gr.Plot()
    #     with gr.Column():
    #         gr.Plot()
    #     with gr.Column():
    #         gr.Plot(value=get_tsne())

demo.launch()