Standard_Intelligence_Dev

Sleeping

File size: 12,840 Bytes

import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter
import matplotlib.ticker as ticker
import gradio as gr

def category_chart(file_path):
    # Load the Excel file
    df = pd.read_excel(file_path)

    # Ensure the 'Topic' column exists and drop any rows without a topic
    if 'Topic' not in df.columns or df['Topic'].isnull().all():
        raise ValueError("The 'Topic' column is missing or empty.")

    df.dropna(subset=['Topic'], inplace=True)

    # Split multiple topics and flatten the list
    all_topics = [topic.strip() for sublist in df['Topic'].str.split(',').tolist() for topic in sublist if topic]

    # Count occurrences of each topic
    topic_counts = Counter(all_topics)

    # Convert to DataFrame for plotting
    topic_counts_df = pd.DataFrame(topic_counts.items(), columns=['Topic', 'Count']).sort_values('Count', ascending=False)

    # Plotting
    plt.close('all')
    fig, ax = plt.subplots(figsize=(14, 7))
    ax.set_facecolor('#222c52')
    fig.patch.set_facecolor('#222c52')

    colors = ['#08F7FE' if i % 2 == 0 else '#FE53BB' for i in range(len(topic_counts_df))]
    topic_counts_df.plot(kind='bar', x='Topic', y='Count', ax=ax, color=colors, edgecolor=colors, alpha=0.7, linewidth=2, legend=None)

    ax.xaxis.label.set_color('white')
    ax.yaxis.label.set_color('white')
    ax.tick_params(axis='x', colors='white', labelsize=10, direction='out', length=6, width=2, rotation=45)
    ax.tick_params(axis='y', colors='white', labelsize=10, direction='out', length=6, width=2)
    ax.set_title('Topic Frequency Distribution', color='white', fontsize=16)
    ax.set_xlabel('Topic', fontsize=14)
    ax.set_ylabel('Count', fontsize=14)
    ax.grid(True, which='both', axis='y', color='gray', linestyle='-', linewidth=0.5, alpha=0.5)
    ax.set_axisbelow(True)

    for spine in ax.spines.values():
        spine.set_color('white')
        spine.set_linewidth(1)
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    return fig




def status_chart(file_path):
    # Load the Excel file
    plt.close('all')
    data = pd.read_excel(file_path)

    # Calculate the frequency of each status
    status_counts = data['Status'].value_counts()

    # Define colors with 50% opacity
    colors = ['#08F7FE80', '#FE53BB80',
              '#fff236de', '#90ff00bf']  # '80' for 50% opacity

    # Plotting
    fig, ax = plt.subplots()
    fig.patch.set_facecolor('#222c52')  # Set the background color of the figure
    ax.set_facecolor('#222c52')  # Set the background color of the axes
    wedges, texts, autotexts = ax.pie(status_counts, autopct='%1.1f%%', startangle=90, colors=colors,
                                      wedgeprops=dict(edgecolor='white', linewidth=1.5))

    # Set legend
    ax.legend(wedges, status_counts.index, title="Document Status", loc="center left", bbox_to_anchor=(1, 0, 0.5, 1))

    ax.set_ylabel('')  # Remove the y-label
    ax.set_title('Document Status Distribution', color='white')

    plt.setp(autotexts, size=8, weight="bold", color="white")

    return fig



def plot_glowing_line_with_dots_enhanced(ax, x, y, color, label, glow_size=10, base_linewidth=3, markersize=8):
    for i in range(1, glow_size + 1):
        alpha_value = (1.0 / glow_size) * (i / (glow_size / 2))
        if alpha_value > 1.0:
            alpha_value = 1.0
        linewidth = base_linewidth * i * 0.5
        ax.plot(x, y, color=color, linewidth=linewidth, alpha=alpha_value * 0.1)
    ax.plot(x, y, color=color, linewidth=base_linewidth, marker='o', linestyle='-', label=label, markersize=markersize)

def company_document_type(file_path, company_names):
    plt.close('all')
    if isinstance(company_names, str):
        company_names = [name.strip() for name in company_names.split(',')]

    df = pd.read_excel(file_path)

    fig, ax = plt.subplots(figsize=(14, 8))
    ax.set_facecolor('#222c52')
    fig.patch.set_facecolor('#222c52')

    colors = ['#08F7FE', '#FE53BB', '#fff236']  # Add more colors if necessary

    max_count = 0
    for index, company_name in enumerate(company_names):
        df_company = df[df['Source'].str.contains(company_name, case=False, na=False)]
        document_counts = df_company['Type'].value_counts()
        all_document_types = df['Type'].unique()
        document_counts = document_counts.reindex(all_document_types, fill_value=0)

        x_data = document_counts.index
        y_data = document_counts.values
        ax.fill_between(x_data, y_data, -0.2, color=colors[index % len(colors)], alpha=0.1)
        plot_glowing_line_with_dots_enhanced(ax, x_data, y_data, colors[index % len(colors)], company_name, base_linewidth=4)

        if max_count < max(y_data):
            max_count = max(y_data)

    ax.set_xticks(range(len(all_document_types)))
    ax.set_xticklabels(all_document_types, rotation=45, fontsize=12, fontweight='bold', color='white')
    ax.yaxis.set_major_locator(ticker.MaxNLocator(integer=True))
    ax.set_ylabel('Count', color='white')
    ax.set_title('Document Types Contributed by Companies', color='white')
    ax.grid(True, which='both', axis='both', color='gray', linestyle='-', linewidth=0.5, alpha=0.5)
    ax.set_axisbelow(True)

    plt.ylim(-0.2, max_count + 1)

    for spine in ax.spines.values():
        spine.set_color('white')
        spine.set_linewidth(2)

    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.spines['left'].set_position(('data', 0))
    plt.legend(facecolor='#222c52', edgecolor='white', fontsize=12, labelcolor='white')

    return fig


def get_expert(file_path):
    # Load the Excel file
    df = pd.read_excel(file_path)

    # Ensure the 'Expert' column exists
    if 'Expert' not in df.columns:
        raise ValueError("The 'Expert' column is missing from the provided file.")

    # Combine all the experts into a single list, accounting for multiple experts per row
    all_experts = []
    for experts in df['Expert'].dropna().unique():
        all_experts.extend([expert.strip() for expert in experts.split(',')])

    # Get unique experts and return them
    unique_experts = sorted(set(all_experts))
    return gr.update(choices=list(unique_experts))

def chart_by_expert(file_path, expert_name):
    plt.close('all')
    # Load the Excel file
    data = pd.read_excel(file_path)

    # Normalize the expert's name if it follows a specific format; otherwise, adjust accordingly
    parts = expert_name.split('/')
    name = parts[1].strip() if len(parts) > 1 else expert_name.strip()

    # Normalize function for companies, similar to the original code
    def normalize_companies(company_list, merge_entities):
        normalized = set()
        for company in company_list:
            normalized_name = merge_entities.get(company.strip(), company.strip())
            normalized.add(normalized_name)
        return list(normalized)

    # Define merge entities mapping, as provided
    merge_entities = {
        "Nokia Shanghai Bell": "Nokia",
        "Qualcomm Korea": "Qualcomm",
        # Add all other mappings as per the original code
        # ...
        "Hugues Network Systems": "Hughes"
    }

    # Adjust data processing to handle multiple experts and sources
    # Flatten and normalize the source field across relevant rows
    data['ExpertsList'] = data['Expert'].dropna().apply(lambda x: [expert.strip() for expert in x.split(',')])
    data_exploded = data.explode('ExpertsList')

    # Filter the data for the specified expert and handle multiple sources
    filtered_data = data_exploded[data_exploded['ExpertsList'].str.contains(name, case=False, na=False)]
    sources = filtered_data['Source'].dropna()
    split_sources = sources.apply(lambda x: normalize_companies(x.split(', '), merge_entities))
    all_sources = [company for sublist in split_sources for company in sublist]

    # Count occurrences and get the top 10
    source_counts = Counter(all_sources)
    top_10_sources = source_counts.most_common(10)

    # Convert to DataFrame for plotting
    top_10_df = pd.DataFrame(top_10_sources, columns=['Company', 'Count'])

    # Plotting
    fig, ax = plt.subplots(figsize=(14, 11))
    ax.set_facecolor('#222c52')
    fig.patch.set_facecolor('#222c52')

    # Alternating colors for the bars
    colors = ['#08F7FE' if i % 2 == 0 else '#FE53BB' for i in range(len(top_10_df))]
    top_10_df.plot(kind='bar', x='Company', y='Count', ax=ax, color=colors, edgecolor=colors, alpha=0.5, linewidth=5)

    # Set chart details
    ax.xaxis.label.set_color('white')
    ax.yaxis.label.set_color('white')
    ax.tick_params(axis='x', colors='white', labelsize=12, direction='out', length=6, width=2, rotation=45)
    ax.tick_params(axis='y', colors='white', labelsize=12, direction='out', length=6, width=2)
    ax.set_title(f"Top 10 Contributors for Expert '{expert_name}'", color='white', fontsize=16)
    ax.set_xlabel('Company', fontsize=14)
    ax.set_ylabel('Count', fontsize=14)
    ax.yaxis.set_major_locator(ticker.MaxNLocator(integer=True))
    ax.grid(True, which='both', axis='y', color='gray', linestyle='-', linewidth=0.5, alpha=0.5)
    ax.set_axisbelow(True)

    for spine in ax.spines.values():
        spine.set_color('white')
        spine.set_linewidth(2)
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)

    return fig




# @title Top 10 des entreprises en termes de publications



def generate_company_chart(file_path):
    # plt.close('all')
    # Define merge entities mapping
    merge_entities = {
        "Nokia Shanghai Bell": "Nokia",
        "Qualcomm Korea": "Qualcomm",
        "Qualcomm Incorporated": "Qualcomm",
        "Huawei Technologies R&D UK": "Huawei",
        "Hughes Network Systems": "Hughes",
        "HUGHES Network Systems": "Hughes",
        "Hughes Network systems": "Hughes",
        "HUGHES Network Systems Ltd": "Hughes",
        "KT Corp.": "KT Corporation",
        "Deutsche Telekom AG": "Deutsche Telekom",
        "LG Electronics Inc.": "LG Electronics",
        "LG Uplus": "LG Electronics",
        "OPPO (chongqing) Intelligence": "OPPO",
        "Samsung Electronics GmbH": "Samsung",
        "China Mobile International Ltd": "China Mobile",
        "NOVAMINT": "Novamint",
        "Eutelsat": "Eutelsat Group",
        "Inmarsat Viasat": "Inmarsat",
        "China Telecommunications": "China Telecom",
        "SES S.A.": "SES",
        "Ericsson GmbH": "Ericsson",
        "JSAT": "SKY Perfect JSAT",
        "NEC Europe Ltd": "NEC",
        "Fraunhofer IIS": "Fraunhofer",
        "Hugues Network Systems": "Hughes"
    }

    # Function to normalize company names within each cell
    def normalize_companies(company_list, merge_entities):
        normalized = set()  # Use a set to avoid duplicates within the same cell
        for company in company_list:
            normalized_name = merge_entities.get(company.strip(), company.strip())
            normalized.add(normalized_name)
        return list(normalized)

    # Load the Excel file
    data = pd.read_excel(file_path)

    # Prepare the data
    sources = data['Source'].dropna()
    split_sources = sources.apply(lambda x: normalize_companies(x.split(', '), merge_entities))

    # Flatten the list of lists while applying the merge rules
    all_sources = [company for sublist in split_sources for company in sublist]

    # Count occurrences
    source_counts = Counter(all_sources)
    top_10_sources = source_counts.most_common(10)

    # Convert to DataFrame for plotting
    top_10_df = pd.DataFrame(top_10_sources, columns=['Company', 'Count'])

    # Plotting
    fig, ax = plt.subplots(figsize=(14, 12))
    ax.set_facecolor('#222c52')
    fig.patch.set_facecolor('#222c52')

    # Alternating colors for the bars
    colors = ['#08F7FE' if i % 2 == 0 else '#FE53BB' for i in range(len(top_10_df))]
    top_10_df.plot(kind='bar', x='Company', y='Count', ax=ax, color=colors, edgecolor=colors, alpha=0.5, linewidth=5, legend=None)

    # Set chart details
    ax.xaxis.label.set_color('white')
    ax.yaxis.label.set_color('white')
    ax.tick_params(axis='x', colors='white', labelsize=16, direction='out', length=6, width=2, rotation=37)
    ax.tick_params(axis='y', colors='white', labelsize=12, direction='out', length=6, width=2)
    ax.set_title('Top 10 Contributors: Ranking Company Contributions', color='white', fontsize=16)
    ax.set_xlabel('Company', fontsize=14)
    ax.set_ylabel('Count', fontsize=14)
    ax.grid(True, which='both', axis='y', color='gray', linestyle='-', linewidth=0.5, alpha=0.5)
    ax.set_axisbelow(True)

    for spine in ax.spines.values():
        spine.set_color('white')
        spine.set_linewidth(2)
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)

    #plt.show()
    return fig