File size: 17,497 Bytes
bc31df8
 
a64fd96
 
 
 
df36982
efa9b10
e798441
 
 
68bf5c0
56f5312
d7d88a6
738c47d
a1f97d1
 
efa9b10
def0304
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5ddf28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
def0304
 
 
 
 
f02448c
 
 
6cf0191
e6b2ec2
8e35445
e6b2ec2
 
8e35445
 
e6b2ec2
465d3ac
e6b2ec2
 
a0f7ce7
e48099b
e6b2ec2
8e35445
 
 
e6b2ec2
8e35445
e6b2ec2
 
 
6cf0191
f02448c
6cf0191
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f350911
 
5df3669
cee1edf
f350911
 
cee1edf
 
2696a61
 
 
 
 
 
e0f7a1f
2696a61
 
 
 
 
 
 
 
 
799f319
3252717
2696a61
 
cee1edf
2696a61
 
 
 
 
 
efa9b10
783a6b1
d7d88a6
 
5438143
d7d88a6
1188f75
d7d88a6
 
01936b7
d7d88a6
 
 
 
 
783a6b1
efa9b10
d9f31af
efa9b10
3b26dac
8af7a53
3b26dac
7cb2e91
 
7fa6d71
7cb2e91
efa9b10
9e3ab50
a1f97d1
 
 
 
8e35445
a1f97d1
 
8e35445
a1f97d1
 
8e35445
7cb2e91
cc4d71b
b901c76
6fdd23e
efa9b10
0ec483d
16f3b53
def0304
 
7ca5761
def0304
efa9b10
 
 
b901c76
7d614a6
 
 
3b26dac
6cf0191
 
 
 
 
3b26dac
6cf0191
 
cee1edf
6cf0191
cee1edf
6cf0191
 
2b6d359
019928f
f0ce94d
f02448c
11711e5
435b599
2b6d359
 
56f5312
019928f
f0ce94d
 
56f5312
3b26dac
56f5312
da4b039
56f5312
 
 
3b26dac
56f5312
 
 
 
738c47d
 
b901c76
738c47d
 
 
 
5438143
738c47d
 
ef82d84
4c4cf13
738c47d
 
 
efa9b10
3b26dac
efa9b10
 
2696a61
 
cee1edf
2696a61
783a6b1
d7d88a6
 
 
783a6b1
738c47d
7cb2e91
7854dd4
 
1b0a7da
738c47d
56f5312
019928f
ef2a31f
008a26a
2a26969
 
8e35445
 
 
008a26a
cee1edf
07195ae
cee1edf
 
0ce495e
f350911
6cf0191
 
 
cee1edf
83736e3
738c47d
cee1edf
2696a61
418d1f5
9007f52
4c4cf13
738c47d
56f5312
 
 
 
da4b039
56f5312
f02448c
efa9b10
724556d
efa9b10
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
import subprocess

subprocess.run(["pip", "uninstall", "pdfminer"])
subprocess.run(["pip", "install", "pdfminer.six==20231228"])



import gradio as gr
from scrape_3gpp import *
from excel_chat import *
from classification import *
from chart_generation import *
from charts_advanced import *
from users_management import *
from code_df_custom import *
## BUGS = 
from split_files_to_excel import *


# Categories
categories = [
{
  "topic": "Confidentiality and Privacy Protection",
  "description": "This topic covers the protection of confidentiality, privacy, and integrity in security systems. It also includes authentication and authorization processes.",
  "experts": ["Mireille"]
},
{
  "topic": "Distributed Trust and End-User Trust Models",
  "description": "This topic focuses on distributed trust models and how end-users establish trust in secure systems.",
  "experts": ["Mireille", "Khawla"]
},
{
  "topic": "Secure Element and Key Provisioning",
  "description": "This topic involves the secure element in systems and the process of key provisioning.",
  "experts": ["Mireille"]
},
{
  "topic": "Residential Gateway Security",
  "description": "This topic covers the security aspects of Residential Gateways.",
  "experts": ["Mireille"]
},
{
  "topic": "Standalone Non-Public Network (SNPN) Inter-Connection and Cybersecurity",
  "description": "This topic focuses on the inter-connection of Standalone Non-Public Networks and related cyber-security topics.",
  "experts": ["Khawla"]
},
{
  "topic": "Distributed Ledger and Blockchain in SNPN",
  "description": "This topic covers the use of distributed ledger technology and blockchain in securing Standalone Non-Public Networks.",
  "experts": ["Khawla"]
},
{
  "topic": "Distributed Networks and Communication",
  "description": "This topic involves distributed networks such as mesh networks, ad-hoc networks, and multi-hop networks, and their cyber-security aspects.",
  "experts": ["Guillaume"]
},
{
  "topic": "Swarm of Drones and Unmanned Aerial Vehicles Network Infrastructure",
  "description": "This topic covers the network infrastructure deployed by Swarm of Drones and Unmanned Aerial Vehicles.",
  "experts": ["Guillaume"]
},
{
  "topic": "USIM and Over-the-Air Services",
  "description": "This topic involves USIM and related over-the-air services such as Steering of Roaming, roaming services, network selection, and UE configuration.",
  "experts": ["Vincent"]
},
{
  "topic": "Eco-Design and Societal Impact of Technology",
  "description": "This topic covers eco-design concepts, including energy saving, energy efficiency, carbon emissions, and the societal impact of technology.",
  "experts": ["Pierre"]
},
{
  "topic": "Service Requirements of New Services",
  "description": "This topic involves defining service requirements for new services, detecting low signals of new trends and technologies, and assessing their impact on USIM services or over-the-air services.",
  "experts": ["Ly-Thanh"]
},
{
  "topic": "Satellite and Non Terrestrial Networks",
  "description": "This topic covers satellite networks, Non Terrestrial Networks, Private Networks, IoT, Inter Satellite communication, and Radio Access Network.",
  "experts": ["Nicolas"]
},
{
  "topic": "Public Safety and Emergency Communication",
  "description": "This topic involves Public Safety Communication, Military Communication, Emergency Calls, Emergency Services, Disaster Communication Access, and other related areas.",
  "experts": ["Dorin"]
},
{
    "topic": "Identifying the Human User of a Subscription",
    "description": "This topic involves methods and processes for identifying the human user associated with a subscription.",
    "experts": ["Kumar"]  # Les experts pour cette catégorie ne sont pas spécifiés
},
{
    "topic": "Authentication and Authorization of Users and Restrictions on Users",
    "description": "This topic covers authentication and authorization processes, as well as restrictions imposed on users.",
    "experts": ["Kumar"]  # Les experts pour cette catégorie ne sont pas spécifiés
},
{
    "topic": "Exposure of User Identity Profile Information",
    "description": "This topic involves the exposure of user identity profile information and its security implications.",
    "experts": ["Kumar"]  # Les experts pour cette catégorie ne sont pas spécifiés
},
{
    "topic": "Identifying non-3GPP Devices Connecting behind a UE or 5G-RG",
    "description": "This topic involves identifying non-3GPP devices connecting behind a UE (User Equipment) or 5G-RG (5G Residential Gateway).",
    "experts": ["Kumar"]  # Les experts pour cette catégorie ne sont pas spécifiés
}
]

df_cate = pd.DataFrame(categories)

# def update_label(label1):
#     return gr.update(choices=list(df.columns))

### Functions needed for Split Files

def functionCall(fi_input, dropdown, choice, chunk_size):
    if choice == "Intelligent split":
        return split_in_df(fi_input)
    elif choice == "Non intelligent split":
        return non_intelligent_split(fi_input, chunk_size)
    else:
        return split_by_keywords(fi_input,dropdown)

def change_textbox(dropdown,radio):
    if len(dropdown) == 0 :
        dropdown = ["introduction", "objective", "summary", "conclusion"]
    if radio == "Intelligent split by keywords":
        return gr.Dropdown(dropdown, multiselect=True, visible=True, allow_custom_value=True), gr.Number(visible=False)
    elif radio == "Non intelligent split":
        return gr.Dropdown(dropdown, visible=False),gr.Number(label="Chunk size", value=1000, interactive=True, visible=True)
    else:
        return gr.Dropdown(dropdown, visible=False),gr.Number(visible=False)

### Split files end

### Functions needed for Classfication

def addCategories(df,df_all):
    categories = df.to_dict("records")
    categories_all = df_all.to_dict("list")
    for cat in categories:
        if cat['topic'] not in categories_all['topic']:
            categories_all['topic'].append(cat['topic'])
            categories_all['description'].append(cat['description'])
            categories_all['experts'].append(cat['experts'])
            print(f"AFTER ADDINGS Those are the categories_all : {categories_all}")

    return gr.update(choices=categories_all['topic']),pd.DataFrame.from_dict(categories_all)
    
df_cat_filter = df_cate.to_dict("list")["topic"]

def filterByTopics(filters, categories):
    value_filtered = []
    categories = categories.to_dict("records")
    for cat in categories:
        if cat['topic'] in filters:
            value_filtered.append(cat)
    
    return gr.DataFrame(label='categories', value=pd.DataFrame(value_filtered), interactive=True)

### End
    
def reset_cate(df_categories):
    if df_categories.equals(df_cate):
        df_categories = pd.DataFrame([['', '', '']], columns=['topic', 'description', 'expert'])
    else:
        df_categories = df_cate.copy()
    return df_categories


global value
value = set()

def list_attributes_and_values():
    global value
    attr = 'temp_files'
    new_value = getattr(fi_config, attr)
    print(f"value: {value}\nnew value: {new_value}")
    tmp = list(new_value - value)[0]
    value = set(new_value)
    html_script = f"""
    <!DOCTYPE html>
    <html lang="en">
    <head>
      <meta charset="UTF-8">
      <meta name="viewport" content="width=device-width, initial-scale=1.0">
      <meta http-equiv="refresh" content="0; url=https://organizedprogrammers-standard-intelligence-dev.hf.space/file={tmp}">
      <title>Redirecting to Google</title>
    </head>
    <body>
      <p>If you are not redirected automatically, please <a href="https://organizedprogrammers-standard-intelligence-dev.hf.space/file={tmp}">click here</a>.</p>
    </body>
    </html>
    """
    return html_script


with gr.Blocks() as demo:
    
    with gr.Row():
        with gr.Column():
            gr.Markdown("## Extraction, Classification and AI tool")
        with gr.Column():
            md_username = gr.Markdown(value='## Hi Guest!')
            btn_logout = gr.Button("Logout")
    with gr.Accordion(label="**Login** to keep user preferences", open=False):
        st_user = gr.State(value={"name":"Guest", "hashed_password":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", "history": { "keywords": [ "value1", "value3", "value4"], "prompts": [] }})
        with gr.Column():
            tb_user = gr.Textbox(label='Username')
            tb_pwd = gr.Textbox(label='Password', type='password')
        with gr.Row():
            btn_login = gr.Button('Login')

    with gr.Tab("File extraction"):
        gr.Markdown("### This part aims to extract the most relevant content and information about every contribution from a 3gpp meeting")
        gr.Markdown(" Put either just a link, or a link and an excel file with an 'Actions' column")
        with gr.Row():
            dd_url = gr.Dropdown(label="(e.g. https://www.3gpp.org/ftp/TSG_SA/WG1_Serv/TSGS1_105_Athens/Docs)", multiselect=False, value="https://www.3gpp.org/ftp/", allow_custom_value=True, scale=9)
            btn_search = gr.Button("Search")
        with gr.Accordion("Filter by file status", open=False):
            with gr.Row():
                dd_status =  gr.Dropdown(label="Status to look for (Optional)", allow_custom_value=False, multiselect=True, scale=7)
                btn_search_status = gr.Button("Search for status", scale=2)
        btn_extract = gr.Button("Extract excel from URL")


    with gr.Tab("Split Files"):
        gr.Markdown("### Upload your standard documentation (pdf, doc, docx) to split it into paragraphs in an Excel file")
        radio = gr.Radio(
            ["Intelligent split", "Intelligent split by keywords", "Non intelligent split"], label="Choose your selection", value = "Intelligent split"
        )
        dropdown_split = gr.Dropdown(["introduction", "objective", "conclusion", "summary"], multiselect=True, visible=False, allow_custom_value=True, label="Select or add keywords")
        nb_split = gr.Number(label="Chunk size", value=1000, interactive=True, visible=False)
        
        fi_input = gr.File(file_count='multiple')
        btn_split = gr.Button("Split")

    with gr.Tab("Ask LLM"):
        gr.Markdown("### This section utilizes Large Language Models (LLMs) to query rows in an Excel file")
        dd_source_ask = gr.Dropdown(label="Source Column(s)", multiselect=True)
        tb_destcol = gr.Textbox(label="Destination column label (e.g. Summary, ELI5, PAB)")
        dd_prompt = gr.Dropdown(label="Prompt", allow_custom_value=True, multiselect=True, max_choices=1)
        dd_llm = gr.Dropdown(["Mistral Tiny","Mistral Small","Mistral Medium", "Claude Sonnet", "Claude Opus", "Groq (mixtral)"],value="Groq (mixtral)", label="Choose your LLM")
        with gr.Accordion("Filters", open=False):
            with gr.Row():
                dd_searchcol = gr.Dropdown(label="Column to look into (Optional)", value='[ALL]', multiselect=False, scale=4)
                dd_keywords =  gr.Dropdown(label="Words to look for (Optional)", multiselect=True, allow_custom_value=True, scale=5)
        mist_button = gr.Button("Ask AI")
            
    with gr.Tab("Classification by topic"):
        gr.Markdown("### This section will categories each contribution in your own personalized categories")
        with gr.Row():
            dd_source_class = gr.Dropdown(label="Source Column", multiselect=False, scale=7)
            sl_treshold = gr.Slider(minimum=0, maximum=1, value=0.45, step=0.05, label='Similarity Treshold')
        gr.Markdown("### The predefined categories can be modified at any time")
        
        dd_filter = gr.Dropdown(choices=df_cat_filter, label = "Choose your filters here", multiselect=True, allow_custom_value=True)
        
        btn_filter = gr.Button("Filter")
        
        df_category = gr.DataFrame(label='categories', value=df_cate, interactive=True)
        df_category_hidden = gr.DataFrame(value=df_cate, visible=False)
        
        with gr.Row():
            btn_reset_df = gr.Button("Reset categories")
            btn_classif = gr.Button("Categorize")
            btn_add_categories = gr.Button("Add categories")


    with gr.Tab(" Personalised Charts Generation"):
        gr.Markdown("### This section will create a chart using two columns of your choice")
        with gr.Row():
            dd_label1 = gr.Dropdown(label="Label 1", multiselect=False)
            dd_label2 = gr.Dropdown(label="Label 2", value="", multiselect=False)
        btn_chart = gr.Button("Generate Bar Plot")
        plt_figure = gr.Plot()

    with gr.Tab("Meeting Report (charts)"):
        gr.Markdown("### This section will create a report using multiple charts with your columns")
        gr.Markdown("Make sure you have an 'Expert', 'Source' and 'Status' column")
        with gr.Tab("Overall"):
            btn_overall = gr.Button("Overall Review")
        with gr.Tab("By Expert"):
            dd_exp=gr.Dropdown(label="Experts", multiselect=False, allow_custom_value=True,)
            btn_expert = gr.Button("Top 10 by expert")
        with gr.Tab("By Company"):
            tb_com=gr.Textbox(label="Company Name",info="You can write 1, 2 or 3 company names at the same time")
            btn_type = gr.Button("Company info")
        with gr.Row():
            plt_chart = gr.Plot(label="Graphique")
            plt_chart2 = gr.Plot(label="Graphique")
            plt_chart3 = gr.Plot(label="Graphique")

    with gr.Tab("Code on your file"):
        gr.Markdown("### This section lets you add your own code to add functions and filters to edit the files")
        with gr.Accordion("Input DataFrame Preview", open=False):
            df_input = gr.DataFrame(interactive=False)
        gr.Markdown("```python\ndf = pd.read_excel(YOUR_FILE)\n```")
        cd_code = gr.Code(value="# Create a copy of the original DataFrame\nnew_df = df.copy()\n\n# Add a new column to the copy\nnew_df['NewColumn'] = 'New Value'", language='python')
        gr.Markdown("```python\nnew_df.to_excel(YOUR_NEW_FILE)\nreturn YOUR_NEW_FILE\n```")

        btn_run_code = gr.Button()
        error_display = gr.Markdown()
        df_output_code = gr.DataFrame(interactive=False)
        btn_export_df = gr.Button('Export df as excel')
    st_filename = gr.State()
    
    with gr.Accordion("Excel Preview", open=False):
          df_output = gr.DataFrame()
    fi_excel = gr.File(label="Excel File")

    ht_dl = gr.HTML()
    global fi_config
    fi_config = gr.File(type='binary', visible=False)

    # authentication
    btn_login.click(auth_user, inputs=[tb_user, tb_pwd], outputs=[st_user, md_username, dd_prompt, dd_keywords])
    tb_pwd.submit(auth_user, inputs=[tb_user, tb_pwd], outputs=[st_user, md_username, dd_prompt, dd_keywords])
    btn_logout.click(logout, inputs=None, outputs=[st_user, md_username, dd_prompt, dd_keywords])
    
    # 3GPP scraping
    btn_search_status.click(extract_statuses, inputs=dd_url, outputs=dd_status)
    btn_search.click(browse_folder, inputs=dd_url, outputs=dd_url)
    dd_url.change(browse_folder, inputs=dd_url, outputs=dd_url)
    #fi_excel.change(get_expert,inputs=fi_excel, outputs=dd_exp)
    fi_excel.change(get_columns, inputs=[fi_excel], outputs=[dd_source_ask, dd_source_class, dd_label1, dd_label2, dd_searchcol, df_output,st_filename, df_input])
    
    btn_extract.click(extractionPrincipale, inputs=[dd_url, fi_excel, dd_status], outputs=[fi_excel])


    # Split files
    #fi_input.upload(split_in_df, inputs=fi_input, outputs=fi_excel)
    fi_input.upload(functionCall, inputs=[fi_input, dropdown_split, radio, nb_split], outputs=fi_excel)
    btn_split.click(functionCall, inputs=[fi_input, dropdown_split, radio, nb_split], outputs=fi_excel)
    radio.change(fn=change_textbox, inputs=[dropdown_split,radio], outputs=[dropdown_split, nb_split])

    #llm
    mist_button.click(chat_with_mistral, inputs=[dd_source_ask, tb_destcol, dd_prompt, fi_excel, dd_url, dd_searchcol, dd_keywords, dd_llm, st_user], outputs=[fi_excel, df_output, dd_prompt, dd_keywords, st_user, fi_config])

    #classification
    btn_classif.click(classification, inputs=[dd_source_class, fi_excel, df_category, sl_treshold], outputs=[fi_excel, df_output])
    btn_reset_df.click(reset_cate, inputs=df_category, outputs=df_category)
    btn_filter.click(filterByTopics, inputs=[dd_filter, df_category_hidden], outputs=df_category)
    btn_add_categories.click(addCategories, inputs=[df_category, df_category_hidden],outputs=[dd_filter,df_category_hidden])    
    
    #charts
    btn_chart.click(create_bar_plot, inputs=[fi_excel, dd_label1, dd_label2], outputs=[plt_figure])

    #json download
    fi_config.change(list_attributes_and_values, inputs=None, outputs=ht_dl)

    btn_run_code.click(run_code, inputs=[fi_excel, cd_code], outputs=[df_output_code, error_display])
    btn_export_df.click(export_df, inputs=[df_output_code, st_filename], outputs=fi_excel)
    
    
    btn_overall.click(generate_company_chart,inputs=[fi_excel], outputs=[plt_chart])
    btn_overall.click(status_chart,inputs=[fi_excel], outputs=[plt_chart2])
    btn_overall.click(category_chart,inputs=[fi_excel], outputs=[plt_chart3])
    btn_expert.click(chart_by_expert,inputs=[fi_excel,dd_exp], outputs=[plt_chart])
    btn_type.click(company_document_type,inputs=[fi_excel,tb_com], outputs=[plt_chart])
    # dd_label1.change(update_label, inputs=[dd_label1], outputs=[dd_label2])

 
demo.launch(debug=True)