Standard_Intelligence_Dev

Sleeping

File size: 8,392 Bytes

import pandas as pd
from transformers import AutoTokenizer, AutoModel
from sentence_transformers import SentenceTransformer, util
import numpy as np
import torch
import gradio as gr


# Categories
categories = [
{
  "topic": "Confidentiality and Privacy Protection",
  "description": "This topic covers the protection of confidentiality, privacy, and integrity in security systems. It also includes authentication and authorization processes.",
  "experts": ["Mireille"]
},
{
  "topic": "Distributed Trust and End-User Trust Models",
  "description": "This topic focuses on distributed trust models and how end-users establish trust in secure systems.",
  "experts": ["Mireille", "Khawla"]
},
{
  "topic": "Secure Element and Key Provisioning",
  "description": "This topic involves the secure element in systems and the process of key provisioning.",
  "experts": ["Mireille"]
},
{
  "topic": "Residential Gateway Security",
  "description": "This topic covers the security aspects of Residential Gateways.",
  "experts": ["Mireille"]
},
{
  "topic": "Standalone Non-Public Network (SNPN) Inter-Connection and Cybersecurity",
  "description": "This topic focuses on the inter-connection of Standalone Non-Public Networks and related cyber-security topics.",
  "experts": ["Khawla"]
},
{
  "topic": "Distributed Ledger and Blockchain in SNPN",
  "description": "This topic covers the use of distributed ledger technology and blockchain in securing Standalone Non-Public Networks.",
  "experts": ["Khawla"]
},
{
  "topic": "Distributed Networks and Communication",
  "description": "This topic involves distributed networks such as mesh networks, ad-hoc networks, and multi-hop networks, and their cyber-security aspects.",
  "experts": ["Guillaume"]
},
{
  "topic": "Swarm of Drones and Unmanned Aerial Vehicles Network Infrastructure",
  "description": "This topic covers the network infrastructure deployed by Swarm of Drones and Unmanned Aerial Vehicles.",
  "experts": ["Guillaume"]
},
{
  "topic": "USIM and Over-the-Air Services",
  "description": "This topic involves USIM and related over-the-air services such as Steering of Roaming, roaming services, network selection, and UE configuration.",
  "experts": ["Vincent"]
},
{
  "topic": "Eco-Design and Societal Impact of Technology",
  "description": "This topic covers eco-design concepts, including energy saving, energy efficiency, carbon emissions, and the societal impact of technology.",
  "experts": ["Pierre"]
},
{
  "topic": "Service Requirements of New Services",
  "description": "This topic involves defining service requirements for new services, detecting low signals of new trends and technologies, and assessing their impact on USIM services or over-the-air services.",
  "experts": ["Ly-Thanh"]
},
{
  "topic": "Satellite and Non Terrestrial Networks",
  "description": "This topic covers satellite networks, Non Terrestrial Networks, Private Networks, IoT, Inter Satellite communication, and Radio Access Network.",
  "experts": ["Nicolas"]
},
{
  "topic": "Public Safety and Emergency Communication",
  "description": "This topic involves Public Safety Communication, Military Communication, Emergency Calls, Emergency Services, Disaster Communication Access, and other related areas.",
  "experts": ["Dorin"]
},
{
    "topic": "Identifying the Human User of a Subscription",
    "description": "This topic involves methods and processes for identifying the human user associated with a subscription.",
    "experts": ["Kumar"]  # Les experts pour cette catégorie ne sont pas spécifiés
},
{
    "topic": "Authentication and Authorization of Users and Restrictions on Users",
    "description": "This topic covers authentication and authorization processes, as well as restrictions imposed on users.",
    "experts": ["Kumar"]  # Les experts pour cette catégorie ne sont pas spécifiés
},
{
    "topic": "Exposure of User Identity Profile Information",
    "description": "This topic involves the exposure of user identity profile information and its security implications.",
    "experts": ["Kumar"]  # Les experts pour cette catégorie ne sont pas spécifiés
},
{
    "topic": "Identifying non-3GPP Devices Connecting behind a UE or 5G-RG",
    "description": "This topic involves identifying non-3GPP devices connecting behind a UE (User Equipment) or 5G-RG (5G Residential Gateway).",
    "experts": ["Kumar"]  # Les experts pour cette catégorie ne sont pas spécifiés
}
]

def add_categories(df,df_all):
    categories = df.to_dict("records")
    categories_all = df_all.to_dict("list")
    for cat in categories:
        if cat['topic'] not in categories_all['topic']:
            categories_all['topic'].append(cat['topic'])
            categories_all['description'].append(cat['description'])
            categories_all['experts'].append(cat['experts'])
            print(f"AFTER ADDINGS Those are the categories_all : {categories_all}")

    return gr.update(choices=categories_all['topic']),pd.DataFrame.from_dict(categories_all)

df_cate = pd.DataFrame(categories)    
df_cat_filter = df_cate.to_dict("list")["topic"]

def filter_by_topics(filters, categories):
    value_filtered = []
    categories = categories.to_dict("records")
    for cat in categories:
        if cat['topic'] in filters:
            value_filtered.append(cat)
    
    return gr.DataFrame(label='categories', value=pd.DataFrame(value_filtered), interactive=True)

### End
    
def reset_cate(df_categories):
    if df_categories.equals(df_cate):
        df_categories = pd.DataFrame([['', '', '']], columns=['topic', 'description', 'expert'])
    else:
        df_categories = df_cate.copy()
    return df_categories


def load_data(file_obj):
    # Assuming file_obj is a file-like object uploaded via Gradio, use `pd.read_excel` directly on it
    return pd.read_excel(file_obj)


def initialize_models():
    model_ST = SentenceTransformer("all-mpnet-base-v2")
    return model_ST


def generate_embeddings(df, model, Column):
    embeddings_list = []
    for index, row in df.iterrows():
        if type(row[Column]) == str:
            print(index)
            if 'Title' in df.columns:
                if type(row["Title"]) == str:
                    content = row["Title"] + "\n" + row[Column]
                else:
                    content = row[Column]
            else:
                content = row[Column]
            embeddings = model.encode(content, convert_to_tensor=True)
            embeddings_list.append(embeddings)
        else:
            embeddings_list.append(np.nan)
    df['Embeddings'] = embeddings_list
    return df


def process_categories(categories, model):
    # Create a new DataFrame to store category information and embeddings
    df_cate = pd.DataFrame(categories)
    
    # Generate embeddings for each category description
    df_cate['Embeddings'] = df_cate.apply(lambda cat: model.encode(cat['description'], convert_to_tensor=True), axis=1)

    return df_cate



def match_categories(df, category_df, treshold=0.45):
    for topic in category_df['topic']:
        df[topic] = 0
    for index, ebd_content in enumerate(df['Embeddings']):
        if isinstance(ebd_content, torch.Tensor):
            cos_scores = util.cos_sim(ebd_content, torch.stack(list(category_df['Embeddings']), dim=0))[0]
            high_score_indices = [i for i, score in enumerate(cos_scores) if score > treshold]
            for j in high_score_indices:
                df.loc[index, category_df.loc[j, 'topic']] = float(cos_scores[j])
    return df

def save_data(df, filename):
    df = df.drop(columns=['Embeddings'])
    new_filename = filename.replace(".", "_classified.")
    df.to_excel(new_filename, index=False)
    return new_filename

def classification(column, file_path, categories, treshold):
    # Load data
    df = load_data(file_path)

    # Initialize models
    model_ST = initialize_models()
    print('Generating Embeddings')
    # Generate embeddings for df
    df = generate_embeddings(df, model_ST, column)
    print('Embeddings Generated')


    category_df = process_categories(categories, model_ST)

    # Match categories
    df = match_categories(df, category_df, treshold=treshold)

    # Save data
    return save_data(df,file_path), df

def download_cate(cate_df):
    cate_df.to_excel('categories.xlsx')
    return gr.File(value='categories.xlsx', visible=True)