heymenn's picture
Update app.py
df36982 verified
raw history blame
No virus
17.5 kB
import subprocess
subprocess.run(["pip", "uninstall", "pdfminer"])
subprocess.run(["pip", "install", "pdfminer.six==20231228"])
import gradio as gr
from scrape_3gpp import *
from excel_chat import *
from classification import *
from chart_generation import *
from charts_advanced import *
from users_management import *
from code_df_custom import *
## BUGS =
from split_files_to_excel import *
# Categories
categories = [
{
"topic": "Confidentiality and Privacy Protection",
"description": "This topic covers the protection of confidentiality, privacy, and integrity in security systems. It also includes authentication and authorization processes.",
"experts": ["Mireille"]
},
{
"topic": "Distributed Trust and End-User Trust Models",
"description": "This topic focuses on distributed trust models and how end-users establish trust in secure systems.",
"experts": ["Mireille", "Khawla"]
},
{
"topic": "Secure Element and Key Provisioning",
"description": "This topic involves the secure element in systems and the process of key provisioning.",
"experts": ["Mireille"]
},
{
"topic": "Residential Gateway Security",
"description": "This topic covers the security aspects of Residential Gateways.",
"experts": ["Mireille"]
},
{
"topic": "Standalone Non-Public Network (SNPN) Inter-Connection and Cybersecurity",
"description": "This topic focuses on the inter-connection of Standalone Non-Public Networks and related cyber-security topics.",
"experts": ["Khawla"]
},
{
"topic": "Distributed Ledger and Blockchain in SNPN",
"description": "This topic covers the use of distributed ledger technology and blockchain in securing Standalone Non-Public Networks.",
"experts": ["Khawla"]
},
{
"topic": "Distributed Networks and Communication",
"description": "This topic involves distributed networks such as mesh networks, ad-hoc networks, and multi-hop networks, and their cyber-security aspects.",
"experts": ["Guillaume"]
},
{
"topic": "Swarm of Drones and Unmanned Aerial Vehicles Network Infrastructure",
"description": "This topic covers the network infrastructure deployed by Swarm of Drones and Unmanned Aerial Vehicles.",
"experts": ["Guillaume"]
},
{
"topic": "USIM and Over-the-Air Services",
"description": "This topic involves USIM and related over-the-air services such as Steering of Roaming, roaming services, network selection, and UE configuration.",
"experts": ["Vincent"]
},
{
"topic": "Eco-Design and Societal Impact of Technology",
"description": "This topic covers eco-design concepts, including energy saving, energy efficiency, carbon emissions, and the societal impact of technology.",
"experts": ["Pierre"]
},
{
"topic": "Service Requirements of New Services",
"description": "This topic involves defining service requirements for new services, detecting low signals of new trends and technologies, and assessing their impact on USIM services or over-the-air services.",
"experts": ["Ly-Thanh"]
},
{
"topic": "Satellite and Non Terrestrial Networks",
"description": "This topic covers satellite networks, Non Terrestrial Networks, Private Networks, IoT, Inter Satellite communication, and Radio Access Network.",
"experts": ["Nicolas"]
},
{
"topic": "Public Safety and Emergency Communication",
"description": "This topic involves Public Safety Communication, Military Communication, Emergency Calls, Emergency Services, Disaster Communication Access, and other related areas.",
"experts": ["Dorin"]
},
{
"topic": "Identifying the Human User of a Subscription",
"description": "This topic involves methods and processes for identifying the human user associated with a subscription.",
"experts": ["Kumar"] # Les experts pour cette catégorie ne sont pas spécifiés
},
{
"topic": "Authentication and Authorization of Users and Restrictions on Users",
"description": "This topic covers authentication and authorization processes, as well as restrictions imposed on users.",
"experts": ["Kumar"] # Les experts pour cette catégorie ne sont pas spécifiés
},
{
"topic": "Exposure of User Identity Profile Information",
"description": "This topic involves the exposure of user identity profile information and its security implications.",
"experts": ["Kumar"] # Les experts pour cette catégorie ne sont pas spécifiés
},
{
"topic": "Identifying non-3GPP Devices Connecting behind a UE or 5G-RG",
"description": "This topic involves identifying non-3GPP devices connecting behind a UE (User Equipment) or 5G-RG (5G Residential Gateway).",
"experts": ["Kumar"] # Les experts pour cette catégorie ne sont pas spécifiés
}
]
df_cate = pd.DataFrame(categories)
# def update_label(label1):
# return gr.update(choices=list(df.columns))
### Functions needed for Split Files
def functionCall(fi_input, dropdown, choice, chunk_size):
if choice == "Intelligent split":
return split_in_df(fi_input)
elif choice == "Non intelligent split":
return non_intelligent_split(fi_input, chunk_size)
else:
return split_by_keywords(fi_input,dropdown)
def change_textbox(dropdown,radio):
if len(dropdown) == 0 :
dropdown = ["introduction", "objective", "summary", "conclusion"]
if radio == "Intelligent split by keywords":
return gr.Dropdown(dropdown, multiselect=True, visible=True, allow_custom_value=True), gr.Number(visible=False)
elif radio == "Non intelligent split":
return gr.Dropdown(dropdown, visible=False),gr.Number(label="Chunk size", value=1000, interactive=True, visible=True)
else:
return gr.Dropdown(dropdown, visible=False),gr.Number(visible=False)
### Split files end
### Functions needed for Classfication
def addCategories(df,df_all):
categories = df.to_dict("records")
categories_all = df_all.to_dict("list")
for cat in categories:
if cat['topic'] not in categories_all['topic']:
categories_all['topic'].append(cat['topic'])
categories_all['description'].append(cat['description'])
categories_all['experts'].append(cat['experts'])
print(f"AFTER ADDINGS Those are the categories_all : {categories_all}")
return gr.update(choices=categories_all['topic']),pd.DataFrame.from_dict(categories_all)
df_cat_filter = df_cate.to_dict("list")["topic"]
def filterByTopics(filters, categories):
value_filtered = []
categories = categories.to_dict("records")
for cat in categories:
if cat['topic'] in filters:
value_filtered.append(cat)
return gr.DataFrame(label='categories', value=pd.DataFrame(value_filtered), interactive=True)
### End
def reset_cate(df_categories):
if df_categories.equals(df_cate):
df_categories = pd.DataFrame([['', '', '']], columns=['topic', 'description', 'expert'])
else:
df_categories = df_cate.copy()
return df_categories
global value
value = set()
def list_attributes_and_values():
global value
attr = 'temp_files'
new_value = getattr(fi_config, attr)
print(f"value: {value}\nnew value: {new_value}")
tmp = list(new_value - value)[0]
value = set(new_value)
html_script = f"""
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta http-equiv="refresh" content="0; url=https://organizedprogrammers-standard-intelligence-dev.hf.space/file={tmp}">
<title>Redirecting to Google</title>
</head>
<body>
<p>If you are not redirected automatically, please <a href="https://organizedprogrammers-standard-intelligence-dev.hf.space/file={tmp}">click here</a>.</p>
</body>
</html>
"""
return html_script
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
gr.Markdown("## Extraction, Classification and AI tool")
with gr.Column():
md_username = gr.Markdown(value='## Hi Guest!')
btn_logout = gr.Button("Logout")
with gr.Accordion(label="**Login** to keep user preferences", open=False):
st_user = gr.State(value={"name":"Guest", "hashed_password":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", "history": { "keywords": [ "value1", "value3", "value4"], "prompts": [] }})
with gr.Column():
tb_user = gr.Textbox(label='Username')
tb_pwd = gr.Textbox(label='Password', type='password')
with gr.Row():
btn_login = gr.Button('Login')
with gr.Tab("File extraction"):
gr.Markdown("### This part aims to extract the most relevant content and information about every contribution from a 3gpp meeting")
gr.Markdown(" Put either just a link, or a link and an excel file with an 'Actions' column")
with gr.Row():
dd_url = gr.Dropdown(label="(e.g. https://www.3gpp.org/ftp/TSG_SA/WG1_Serv/TSGS1_105_Athens/Docs)", multiselect=False, value="https://www.3gpp.org/ftp/", allow_custom_value=True, scale=9)
btn_search = gr.Button("Search")
with gr.Accordion("Filter by file status", open=False):
with gr.Row():
dd_status = gr.Dropdown(label="Status to look for (Optional)", allow_custom_value=False, multiselect=True, scale=7)
btn_search_status = gr.Button("Search for status", scale=2)
btn_extract = gr.Button("Extract excel from URL")
with gr.Tab("Split Files"):
gr.Markdown("### Upload your standard documentation (pdf, doc, docx) to split it into paragraphs in an Excel file")
radio = gr.Radio(
["Intelligent split", "Intelligent split by keywords", "Non intelligent split"], label="Choose your selection", value = "Intelligent split"
)
dropdown_split = gr.Dropdown(["introduction", "objective", "conclusion", "summary"], multiselect=True, visible=False, allow_custom_value=True, label="Select or add keywords")
nb_split = gr.Number(label="Chunk size", value=1000, interactive=True, visible=False)
fi_input = gr.File(file_count='multiple')
btn_split = gr.Button("Split")
with gr.Tab("Ask LLM"):
gr.Markdown("### This section utilizes Large Language Models (LLMs) to query rows in an Excel file")
dd_source_ask = gr.Dropdown(label="Source Column(s)", multiselect=True)
tb_destcol = gr.Textbox(label="Destination column label (e.g. Summary, ELI5, PAB)")
dd_prompt = gr.Dropdown(label="Prompt", allow_custom_value=True, multiselect=True, max_choices=1)
dd_llm = gr.Dropdown(["Mistral Tiny","Mistral Small","Mistral Medium", "Claude Sonnet", "Claude Opus", "Groq (mixtral)"],value="Groq (mixtral)", label="Choose your LLM")
with gr.Accordion("Filters", open=False):
with gr.Row():
dd_searchcol = gr.Dropdown(label="Column to look into (Optional)", value='[ALL]', multiselect=False, scale=4)
dd_keywords = gr.Dropdown(label="Words to look for (Optional)", multiselect=True, allow_custom_value=True, scale=5)
mist_button = gr.Button("Ask AI")
with gr.Tab("Classification by topic"):
gr.Markdown("### This section will categories each contribution in your own personalized categories")
with gr.Row():
dd_source_class = gr.Dropdown(label="Source Column", multiselect=False, scale=7)
sl_treshold = gr.Slider(minimum=0, maximum=1, value=0.45, step=0.05, label='Similarity Treshold')
gr.Markdown("### The predefined categories can be modified at any time")
dd_filter = gr.Dropdown(choices=df_cat_filter, label = "Choose your filters here", multiselect=True, allow_custom_value=True)
btn_filter = gr.Button("Filter")
df_category = gr.DataFrame(label='categories', value=df_cate, interactive=True)
df_category_hidden = gr.DataFrame(value=df_cate, visible=False)
with gr.Row():
btn_reset_df = gr.Button("Reset categories")
btn_classif = gr.Button("Categorize")
btn_add_categories = gr.Button("Add categories")
with gr.Tab(" Personalised Charts Generation"):
gr.Markdown("### This section will create a chart using two columns of your choice")
with gr.Row():
dd_label1 = gr.Dropdown(label="Label 1", multiselect=False)
dd_label2 = gr.Dropdown(label="Label 2", value="", multiselect=False)
btn_chart = gr.Button("Generate Bar Plot")
plt_figure = gr.Plot()
with gr.Tab("Meeting Report (charts)"):
gr.Markdown("### This section will create a report using multiple charts with your columns")
gr.Markdown("Make sure you have an 'Expert', 'Source' and 'Status' column")
with gr.Tab("Overall"):
btn_overall = gr.Button("Overall Review")
with gr.Tab("By Expert"):
dd_exp=gr.Dropdown(label="Experts", multiselect=False, allow_custom_value=True,)
btn_expert = gr.Button("Top 10 by expert")
with gr.Tab("By Company"):
tb_com=gr.Textbox(label="Company Name",info="You can write 1, 2 or 3 company names at the same time")
btn_type = gr.Button("Company info")
with gr.Row():
plt_chart = gr.Plot(label="Graphique")
plt_chart2 = gr.Plot(label="Graphique")
plt_chart3 = gr.Plot(label="Graphique")
with gr.Tab("Code on your file"):
gr.Markdown("### This section lets you add your own code to add functions and filters to edit the files")
with gr.Accordion("Input DataFrame Preview", open=False):
df_input = gr.DataFrame(interactive=False)
gr.Markdown("```python\ndf = pd.read_excel(YOUR_FILE)\n```")
cd_code = gr.Code(value="# Create a copy of the original DataFrame\nnew_df = df.copy()\n\n# Add a new column to the copy\nnew_df['NewColumn'] = 'New Value'", language='python')
gr.Markdown("```python\nnew_df.to_excel(YOUR_NEW_FILE)\nreturn YOUR_NEW_FILE\n```")
btn_run_code = gr.Button()
error_display = gr.Markdown()
df_output_code = gr.DataFrame(interactive=False)
btn_export_df = gr.Button('Export df as excel')
st_filename = gr.State()
with gr.Accordion("Excel Preview", open=False):
df_output = gr.DataFrame()
fi_excel = gr.File(label="Excel File")
ht_dl = gr.HTML()
global fi_config
fi_config = gr.File(type='binary', visible=False)
# authentication
btn_login.click(auth_user, inputs=[tb_user, tb_pwd], outputs=[st_user, md_username, dd_prompt, dd_keywords])
tb_pwd.submit(auth_user, inputs=[tb_user, tb_pwd], outputs=[st_user, md_username, dd_prompt, dd_keywords])
btn_logout.click(logout, inputs=None, outputs=[st_user, md_username, dd_prompt, dd_keywords])
# 3GPP scraping
btn_search_status.click(extract_statuses, inputs=dd_url, outputs=dd_status)
btn_search.click(browse_folder, inputs=dd_url, outputs=dd_url)
dd_url.change(browse_folder, inputs=dd_url, outputs=dd_url)
#fi_excel.change(get_expert,inputs=fi_excel, outputs=dd_exp)
fi_excel.change(get_columns, inputs=[fi_excel], outputs=[dd_source_ask, dd_source_class, dd_label1, dd_label2, dd_searchcol, df_output,st_filename, df_input])
btn_extract.click(extractionPrincipale, inputs=[dd_url, fi_excel, dd_status], outputs=[fi_excel])
# Split files
#fi_input.upload(split_in_df, inputs=fi_input, outputs=fi_excel)
fi_input.upload(functionCall, inputs=[fi_input, dropdown_split, radio, nb_split], outputs=fi_excel)
btn_split.click(functionCall, inputs=[fi_input, dropdown_split, radio, nb_split], outputs=fi_excel)
radio.change(fn=change_textbox, inputs=[dropdown_split,radio], outputs=[dropdown_split, nb_split])
#llm
mist_button.click(chat_with_mistral, inputs=[dd_source_ask, tb_destcol, dd_prompt, fi_excel, dd_url, dd_searchcol, dd_keywords, dd_llm, st_user], outputs=[fi_excel, df_output, dd_prompt, dd_keywords, st_user, fi_config])
#classification
btn_classif.click(classification, inputs=[dd_source_class, fi_excel, df_category, sl_treshold], outputs=[fi_excel, df_output])
btn_reset_df.click(reset_cate, inputs=df_category, outputs=df_category)
btn_filter.click(filterByTopics, inputs=[dd_filter, df_category_hidden], outputs=df_category)
btn_add_categories.click(addCategories, inputs=[df_category, df_category_hidden],outputs=[dd_filter,df_category_hidden])
#charts
btn_chart.click(create_bar_plot, inputs=[fi_excel, dd_label1, dd_label2], outputs=[plt_figure])
#json download
fi_config.change(list_attributes_and_values, inputs=None, outputs=ht_dl)
btn_run_code.click(run_code, inputs=[fi_excel, cd_code], outputs=[df_output_code, error_display])
btn_export_df.click(export_df, inputs=[df_output_code, st_filename], outputs=fi_excel)
btn_overall.click(generate_company_chart,inputs=[fi_excel], outputs=[plt_chart])
btn_overall.click(status_chart,inputs=[fi_excel], outputs=[plt_chart2])
btn_overall.click(category_chart,inputs=[fi_excel], outputs=[plt_chart3])
btn_expert.click(chart_by_expert,inputs=[fi_excel,dd_exp], outputs=[plt_chart])
btn_type.click(company_document_type,inputs=[fi_excel,tb_com], outputs=[plt_chart])
# dd_label1.change(update_label, inputs=[dd_label1], outputs=[dd_label2])
demo.launch(debug=True)