File size: 11,588 Bytes
bc31df8
 
a64fd96
 
 
 
df36982
efa9b10
e798441
 
ddce38d
e798441
68bf5c0
56f5312
d7d88a6
738c47d
ddce38d
efa9b10
2696a61
 
 
 
 
 
e0f7a1f
2696a61
 
 
 
 
 
 
 
 
799f319
3252717
2696a61
 
cee1edf
2696a61
 
 
 
 
61f14d3
 
2696a61
efa9b10
783a6b1
d7d88a6
 
5438143
d7d88a6
1188f75
d7d88a6
 
01936b7
d7d88a6
 
 
 
 
783a6b1
efa9b10
d9f31af
efa9b10
3b26dac
8af7a53
3b26dac
7cb2e91
 
7fa6d71
7cb2e91
efa9b10
9e3ab50
a1f97d1
 
 
 
8e35445
a1f97d1
 
8e35445
a1f97d1
 
8e35445
7cb2e91
cc4d71b
b901c76
6fdd23e
efa9b10
0ec483d
90b5a5b
def0304
 
7ca5761
def0304
efa9b10
a2700cf
61f14d3
efa9b10
 
b901c76
7d614a6
 
 
3b26dac
6cf0191
 
 
 
 
3b26dac
6cf0191
7f8dbdf
 
 
 
cee1edf
6cf0191
cee1edf
6cf0191
 
2b6d359
019928f
f0ce94d
f02448c
11711e5
435b599
2b6d359
 
56f5312
019928f
f0ce94d
 
56f5312
3b26dac
56f5312
da4b039
56f5312
 
 
3b26dac
56f5312
 
 
 
738c47d
 
b901c76
738c47d
 
 
 
5438143
738c47d
 
ef82d84
4c4cf13
738c47d
 
 
efa9b10
3b26dac
efa9b10
 
2696a61
 
cee1edf
2696a61
783a6b1
d7d88a6
 
 
783a6b1
738c47d
7cb2e91
7854dd4
 
1b0a7da
738c47d
56f5312
019928f
ef2a31f
008a26a
2a26969
 
7f68f7a
 
8e35445
008a26a
cee1edf
a2700cf
61f14d3
 
cee1edf
0ce495e
f350911
2eb7ac1
 
51d8bdd
6cf0191
cee1edf
83736e3
738c47d
cee1edf
2696a61
418d1f5
9007f52
4c4cf13
738c47d
56f5312
 
 
 
da4b039
56f5312
f02448c
efa9b10
724556d
efa9b10
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
import subprocess

subprocess.run(["pip", "uninstall", "pdfminer"])
subprocess.run(["pip", "install", "pdfminer.six==20231228"])



import gradio as gr
from scrape_3gpp import *
from excel_chat import *
from split_files_to_excel import *
from classification import *
from chart_generation import *
from charts_advanced import *
from users_management import *
from code_df_custom import *


global value
value = set()

def list_attributes_and_values():
    global value
    attr = 'temp_files'
    new_value = getattr(fi_config, attr)
    print(f"value: {value}\nnew value: {new_value}")
    tmp = list(new_value - value)[0]
    value = set(new_value)
    html_script = f"""
    <!DOCTYPE html>
    <html lang="en">
    <head>
      <meta charset="UTF-8">
      <meta name="viewport" content="width=device-width, initial-scale=1.0">
      <meta http-equiv="refresh" content="0; url=https://organizedprogrammers-standard-intelligence-dev.hf.space/file={tmp}">
      <title>Redirecting to Google</title>
    </head>
    <body>
      <p>If you are not redirected automatically, please <a href="https://organizedprogrammers-standard-intelligence-dev.hf.space/file={tmp}">click here</a>.</p>
    </body>
    </html>
    """
    return html_script

def retrieve_checkpoint():
    return "checkpointfile.xlsx"

with gr.Blocks() as demo:
    
    with gr.Row():
        with gr.Column():
            gr.Markdown("## Extraction, Classification and AI tool")
        with gr.Column():
            md_username = gr.Markdown(value='## Hi Guest!')
            btn_logout = gr.Button("Logout")
    with gr.Accordion(label="**Login** to keep user preferences", open=False):
        st_user = gr.State(value={"name":"Guest", "hashed_password":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", "history": { "keywords": [ "value1", "value3", "value4"], "prompts": [] }})
        with gr.Column():
            tb_user = gr.Textbox(label='Username')
            tb_pwd = gr.Textbox(label='Password', type='password')
        with gr.Row():
            btn_login = gr.Button('Login')

    with gr.Tab("File extraction"):
        gr.Markdown("### This part aims to extract the most relevant content and information about every contribution from a 3gpp meeting")
        gr.Markdown(" Put either just a link, or a link and an excel file with an 'Actions' column")
        with gr.Row():
            dd_url = gr.Dropdown(label="(e.g. https://www.3gpp.org/ftp/TSG_SA/WG1_Serv/TSGS1_105_Athens/Docs)", multiselect=False, value="https://www.3gpp.org/ftp/", allow_custom_value=True, scale=9)
            btn_search = gr.Button("Search")
        with gr.Accordion("Filter by file status", open=False):
            with gr.Row():
                dd_status =  gr.Dropdown(label="Status to look for (Optional)", allow_custom_value=False, multiselect=True, scale=7)
                btn_search_status = gr.Button("Search for status", scale=2)
        btn_extract = gr.Button("Extract excel from URL")


    with gr.Tab("Split Files"):
        gr.Markdown("### Upload your standard documentation (pdf, doc, docx) to split it into paragraphs in an Excel file")
        radio = gr.Radio(
            ["Intelligent split", "Intelligent split by keywords", "Non intelligent split"], label="Choose your selection", value = "Intelligent split"
        )
        dropdown_split = gr.Dropdown(["introduction", "objective", "conclusion", "summary"], multiselect=True, visible=False, allow_custom_value=True, label="Select or add keywords")
        nb_split = gr.Number(label="Chunk size", value=1000, interactive=True, visible=False)
        
        fi_input = gr.File(file_count='multiple')
        btn_split = gr.Button("Split")

    with gr.Tab("Ask LLM"):
        gr.Markdown("### This section utilizes Large Language Models (LLMs) to query rows in an Excel file")
        dd_source_ask = gr.Dropdown(label="Source Column(s)", multiselect=True)
        tb_destcol = gr.Textbox(label="Destination column label (e.g. Summary, ELI5, PAB)")
        dd_prompt = gr.Dropdown(label="Prompt", allow_custom_value=True, multiselect=True, max_choices=1)
        dd_llm = gr.Dropdown(["Mistral Tiny","Mistral Small","Mistral Medium", "Claude Sonnet", "Claude Opus", "GPT 4o" , "Groq Mixtral", "Groq Llama3 70b", "Groq Llama3 8b", "Perplexity Llama3 70b", "Perplexity Llama3 8b", "Perplexity Llama3 Sonar Small", "Perplexity Llama3 Sonar Large"],value="Groq Llama3 70b", label="Choose your LLM")
        with gr.Accordion("Filters", open=False):
            with gr.Row():
                dd_searchcol = gr.Dropdown(label="Column to look into (Optional)", value='[ALL]', multiselect=False, scale=4)
                dd_keywords =  gr.Dropdown(label="Words to look for (Optional)", multiselect=True, allow_custom_value=True, scale=5)
        mist_button = gr.Button("Ask AI")
        fi_checkpoint = gr.File()
        btn_checkpoint = gr.Button("Retrieve checkpoint file")
            
    with gr.Tab("Classification by topic"):
        gr.Markdown("### This section will categories each contribution in your own personalized categories")
        with gr.Row():
            dd_source_class = gr.Dropdown(label="Source Column", multiselect=False, scale=7)
            sl_treshold = gr.Slider(minimum=0, maximum=1, value=0.45, step=0.05, label='Similarity Treshold')
        gr.Markdown("### The predefined categories can be modified at any time")
        
        dd_filter = gr.Dropdown(choices=df_cat_filter, label = "Choose your filters here", multiselect=True, allow_custom_value=True)
        
        btn_filter = gr.Button("Filter")
        
        df_category = gr.DataFrame(label='categories', value=df_cate, interactive=True)
        df_category_hidden = gr.DataFrame(value=df_cate, visible=False)

        with gr.Row():
            btn_dl_cate = gr.Button('Download Categories', scale=1)
            fi_categories = gr.File(visible=False, scale=9)
        with gr.Row():
            btn_reset_df = gr.Button("Reset categories")
            btn_classif = gr.Button("Categorize")
            btn_add_categories = gr.Button("Add categories")


    with gr.Tab(" Personalised Charts Generation"):
        gr.Markdown("### This section will create a chart using two columns of your choice")
        with gr.Row():
            dd_label1 = gr.Dropdown(label="Label 1", multiselect=False)
            dd_label2 = gr.Dropdown(label="Label 2", value="", multiselect=False)
        btn_chart = gr.Button("Generate Bar Plot")
        plt_figure = gr.Plot()

    with gr.Tab("Meeting Report (charts)"):
        gr.Markdown("### This section will create a report using multiple charts with your columns")
        gr.Markdown("Make sure you have an 'Expert', 'Source' and 'Status' column")
        with gr.Tab("Overall"):
            btn_overall = gr.Button("Overall Review")
        with gr.Tab("By Expert"):
            dd_exp=gr.Dropdown(label="Experts", multiselect=False, allow_custom_value=True,)
            btn_expert = gr.Button("Top 10 by expert")
        with gr.Tab("By Company"):
            tb_com=gr.Textbox(label="Company Name",info="You can write 1, 2 or 3 company names at the same time")
            btn_type = gr.Button("Company info")
        with gr.Row():
            plt_chart = gr.Plot(label="Graphique")
            plt_chart2 = gr.Plot(label="Graphique")
            plt_chart3 = gr.Plot(label="Graphique")

    with gr.Tab("Code on your file"):
        gr.Markdown("### This section lets you add your own code to add functions and filters to edit the files")
        with gr.Accordion("Input DataFrame Preview", open=False):
            df_input = gr.DataFrame(interactive=False)
        gr.Markdown("```python\ndf = pd.read_excel(YOUR_FILE)\n```")
        cd_code = gr.Code(value="# Create a copy of the original DataFrame\nnew_df = df.copy()\n\n# Add a new column to the copy\nnew_df['NewColumn'] = 'New Value'", language='python')
        gr.Markdown("```python\nnew_df.to_excel(YOUR_NEW_FILE)\nreturn YOUR_NEW_FILE\n```")

        btn_run_code = gr.Button()
        error_display = gr.Markdown()
        df_output_code = gr.DataFrame(interactive=False)
        btn_export_df = gr.Button('Export df as excel')
    st_filename = gr.State()
    
    with gr.Accordion("Excel Preview", open=False):
          df_output = gr.DataFrame()
    fi_excel = gr.File(label="Excel File")

    ht_dl = gr.HTML()
    global fi_config
    fi_config = gr.File(type='binary', visible=False)

    # authentication
    btn_login.click(auth_user, inputs=[tb_user, tb_pwd], outputs=[st_user, md_username, dd_prompt, dd_keywords])
    tb_pwd.submit(auth_user, inputs=[tb_user, tb_pwd], outputs=[st_user, md_username, dd_prompt, dd_keywords])
    btn_logout.click(logout, inputs=None, outputs=[st_user, md_username, dd_prompt, dd_keywords])
    
    # 3GPP scraping
    btn_search_status.click(extract_statuses, inputs=dd_url, outputs=dd_status)
    btn_search.click(browse_folder, inputs=dd_url, outputs=dd_url)
    dd_url.change(browse_folder, inputs=dd_url, outputs=dd_url)
    #fi_excel.change(get_expert,inputs=fi_excel, outputs=dd_exp)
    fi_excel.change(get_columns, inputs=[fi_excel], outputs=[dd_source_ask, dd_source_class, dd_label1, dd_label2, dd_searchcol, df_output,st_filename, df_input])
    
    btn_extract.click(extractionPrincipale, inputs=[dd_url, fi_excel, dd_status], outputs=[fi_excel])


    # Split files
    #fi_input.upload(split_in_df, inputs=fi_input, outputs=fi_excel)
    fi_input.upload(function_split_call, inputs=[fi_input, dropdown_split, radio, nb_split], outputs=fi_excel)
    btn_split.click(function_split_call, inputs=[fi_input, dropdown_split, radio, nb_split], outputs=fi_excel)
    radio.change(fn=change_textbox, inputs=[dropdown_split,radio], outputs=[dropdown_split, nb_split])

    #llm
    mist_button.click(chat_with_mistral, inputs=[dd_source_ask, tb_destcol, dd_prompt, fi_excel, dd_url, dd_searchcol, dd_keywords, dd_llm, st_user], outputs=[fi_excel, df_output, dd_prompt, dd_keywords, st_user, fi_config,fi_checkpoint])
    btn_checkpoint.click(retrieve_checkpoint, outputs=fi_checkpoint)
    
    #classification
    btn_classif.click(classification, inputs=[dd_source_class, fi_excel, df_category, sl_treshold], outputs=[fi_excel, df_output])
    btn_reset_df.click(reset_cate, inputs=df_category, outputs=df_category)
    btn_filter.click(filter_by_topics, inputs=[dd_filter, df_category_hidden], outputs=df_category)
    btn_add_categories.click(add_categories, inputs=[df_category, df_category_hidden],outputs=[dd_filter,df_category_hidden])    
    btn_dl_cate.click(download_cate, inputs=df_category, outputs=fi_categories)
    
    #charts
    btn_chart.click(create_bar_plot, inputs=[fi_excel, dd_label1, dd_label2], outputs=[plt_figure])

    #json download
    fi_config.change(list_attributes_and_values, inputs=None, outputs=ht_dl)

    btn_run_code.click(run_code, inputs=[fi_excel, cd_code], outputs=[df_output_code, error_display])
    btn_export_df.click(export_df, inputs=[df_output_code, st_filename], outputs=fi_excel)
    
    
    btn_overall.click(generate_company_chart,inputs=[fi_excel], outputs=[plt_chart])
    btn_overall.click(status_chart,inputs=[fi_excel], outputs=[plt_chart2])
    btn_overall.click(category_chart,inputs=[fi_excel], outputs=[plt_chart3])
    btn_expert.click(chart_by_expert,inputs=[fi_excel,dd_exp], outputs=[plt_chart])
    btn_type.click(company_document_type,inputs=[fi_excel,tb_com], outputs=[plt_chart])
    # dd_label1.change(update_label, inputs=[dd_label1], outputs=[dd_label2])

 
demo.launch(debug=True)