Spaces:
Sleeping
Sleeping
Update charts_advanced.py
Browse files- charts_advanced.py +65 -83
charts_advanced.py
CHANGED
@@ -4,63 +4,48 @@ from collections import Counter
|
|
4 |
import matplotlib.ticker as ticker
|
5 |
|
6 |
def category_chart(file_path):
|
7 |
-
plt.close('all')
|
8 |
-
# Define expert to specialty mapping
|
9 |
-
expert_specialties = {
|
10 |
-
"mireille": "Security Trust",
|
11 |
-
"khawla": "Network Security",
|
12 |
-
"guillaume": "Distributed Networks",
|
13 |
-
"vincent": "USIM Management",
|
14 |
-
"pierre": "Eco-Design",
|
15 |
-
"ly-thanh": "Trend Analysis",
|
16 |
-
"nicolas": "Satellite Networks",
|
17 |
-
"dorin": "Emergency Communication"
|
18 |
-
}
|
19 |
-
|
20 |
# Load the Excel file
|
21 |
-
|
22 |
|
23 |
-
#
|
24 |
-
|
25 |
-
|
26 |
|
27 |
-
|
28 |
-
specialties = experts.apply(lambda expert: expert_specialties.get(expert.strip(), "Other"))
|
29 |
|
30 |
-
#
|
31 |
-
|
|
|
|
|
|
|
32 |
|
33 |
# Convert to DataFrame for plotting
|
34 |
-
|
35 |
-
specialty_counts_df.columns = ['Specialty', 'Count']
|
36 |
|
37 |
# Plotting
|
38 |
-
|
39 |
-
fig, ax = plt.subplots(figsize=(14,
|
40 |
ax.set_facecolor('#222c52')
|
41 |
fig.patch.set_facecolor('#222c52')
|
42 |
|
43 |
-
#
|
44 |
-
|
45 |
-
specialty_counts_df.plot(kind='bar', x='Specialty', y='Count', ax=ax, color=colors, edgecolor=colors, alpha=0.5, linewidth=5, legend=None)
|
46 |
|
47 |
-
# Set chart details
|
48 |
ax.xaxis.label.set_color('white')
|
49 |
ax.yaxis.label.set_color('white')
|
50 |
-
ax.tick_params(axis='x', colors='white', labelsize=
|
51 |
-
ax.tick_params(axis='y', colors='white', labelsize=
|
52 |
-
ax.set_title('
|
53 |
-
ax.set_xlabel('
|
54 |
ax.set_ylabel('Count', fontsize=14)
|
55 |
ax.grid(True, which='both', axis='y', color='gray', linestyle='-', linewidth=0.5, alpha=0.5)
|
56 |
ax.set_axisbelow(True)
|
57 |
|
58 |
for spine in ax.spines.values():
|
59 |
spine.set_color('white')
|
60 |
-
spine.set_linewidth(
|
61 |
ax.spines['right'].set_visible(False)
|
62 |
ax.spines['top'].set_visible(False)
|
63 |
-
|
64 |
return fig
|
65 |
|
66 |
|
@@ -108,17 +93,16 @@ def plot_glowing_line_with_dots_enhanced(ax, x, y, color, label, glow_size=10, b
|
|
108 |
|
109 |
def company_document_type(file_path, company_names):
|
110 |
plt.close('all')
|
111 |
-
# Convert company_names to a list if it's a string
|
112 |
if isinstance(company_names, str):
|
113 |
-
company_names = [name.strip() for name in company_names.split(',')]
|
114 |
|
115 |
df = pd.read_excel(file_path)
|
116 |
-
|
117 |
fig, ax = plt.subplots(figsize=(14, 8))
|
118 |
ax.set_facecolor('#222c52')
|
119 |
fig.patch.set_facecolor('#222c52')
|
120 |
|
121 |
-
colors = ['#08F7FE', '#FE53BB', '#fff236'] #
|
122 |
|
123 |
max_count = 0
|
124 |
for index, company_name in enumerate(company_names):
|
@@ -136,10 +120,10 @@ def company_document_type(file_path, company_names):
|
|
136 |
max_count = max(y_data)
|
137 |
|
138 |
ax.set_xticks(range(len(all_document_types)))
|
139 |
-
ax.set_xticklabels(all_document_types, rotation=45, fontsize=12, fontweight='bold')
|
140 |
ax.yaxis.set_major_locator(ticker.MaxNLocator(integer=True))
|
141 |
ax.set_ylabel('Count', color='white')
|
142 |
-
ax.set_title('Document Types Contributed by Companies')
|
143 |
ax.grid(True, which='both', axis='both', color='gray', linestyle='-', linewidth=0.5, alpha=0.5)
|
144 |
ax.set_axisbelow(True)
|
145 |
|
@@ -152,68 +136,66 @@ def company_document_type(file_path, company_names):
|
|
152 |
ax.spines['right'].set_visible(False)
|
153 |
ax.spines['top'].set_visible(False)
|
154 |
ax.spines['left'].set_position(('data', 0))
|
155 |
-
plt.legend(facecolor='#222c52', edgecolor='white', fontsize=12)
|
156 |
|
157 |
return fig
|
158 |
|
159 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
|
161 |
def chart_by_expert(file_path, expert_name):
|
162 |
plt.close('all')
|
163 |
# Load the Excel file
|
164 |
data = pd.read_excel(file_path)
|
165 |
-
|
|
|
166 |
parts = expert_name.split('/')
|
|
|
167 |
|
168 |
-
#
|
169 |
-
|
170 |
-
|
171 |
-
|
|
|
|
|
|
|
172 |
|
173 |
-
# Define merge entities mapping
|
174 |
merge_entities = {
|
175 |
"Nokia Shanghai Bell": "Nokia",
|
176 |
"Qualcomm Korea": "Qualcomm",
|
177 |
-
|
178 |
-
|
179 |
-
"Hughes Network Systems": "Hughes",
|
180 |
-
"HUGHES Network Systems": "Hughes",
|
181 |
-
"Hughes Network systems": "Hughes",
|
182 |
-
"HUGHES Network Systems Ltd": "Hughes",
|
183 |
-
"KT Corp.": "KT Corporation",
|
184 |
-
"LG Electronics Inc.": "LG Electronics",
|
185 |
-
"LG Uplus": "LG Electronics",
|
186 |
-
"OPPO (chongqing) Intelligence": "OPPO",
|
187 |
-
"Samsung Electronics GmbH": "Samsung",
|
188 |
-
"China Mobile International Ltd": "China Mobile",
|
189 |
-
"NOVAMINT": "Novamint",
|
190 |
-
"Eutelsat": "Eutelsat Group",
|
191 |
-
"Inmarsat Viasat": "Inmarsat",
|
192 |
-
"China Telecommunications": "China Telecom",
|
193 |
-
"SES S.A.": "SES",
|
194 |
-
"Ericsson GmbH": "Ericsson",
|
195 |
-
"JSAT": "SKY Perfect JSAT",
|
196 |
-
"NEC Europe Ltd": "NEC",
|
197 |
-
"Fraunhofer IIS": "Fraunhofer",
|
198 |
"Hugues Network Systems": "Hughes"
|
199 |
}
|
200 |
|
201 |
-
#
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
normalized_name = merge_entities.get(company.strip(), company.strip())
|
206 |
-
normalized.add(normalized_name)
|
207 |
-
return list(normalized)
|
208 |
|
209 |
-
#
|
|
|
210 |
sources = filtered_data['Source'].dropna()
|
211 |
split_sources = sources.apply(lambda x: normalize_companies(x.split(', '), merge_entities))
|
212 |
-
|
213 |
-
# Flatten the list of lists while applying the merge rules
|
214 |
all_sources = [company for sublist in split_sources for company in sublist]
|
215 |
|
216 |
-
# Count occurrences
|
217 |
source_counts = Counter(all_sources)
|
218 |
top_10_sources = source_counts.most_common(10)
|
219 |
|
@@ -221,7 +203,6 @@ def chart_by_expert(file_path, expert_name):
|
|
221 |
top_10_df = pd.DataFrame(top_10_sources, columns=['Company', 'Count'])
|
222 |
|
223 |
# Plotting
|
224 |
-
#plt.style.use('dark_background')
|
225 |
fig, ax = plt.subplots(figsize=(14, 11))
|
226 |
ax.set_facecolor('#222c52')
|
227 |
fig.patch.set_facecolor('#222c52')
|
@@ -235,7 +216,7 @@ def chart_by_expert(file_path, expert_name):
|
|
235 |
ax.yaxis.label.set_color('white')
|
236 |
ax.tick_params(axis='x', colors='white', labelsize=12, direction='out', length=6, width=2, rotation=45)
|
237 |
ax.tick_params(axis='y', colors='white', labelsize=12, direction='out', length=6, width=2)
|
238 |
-
ax.set_title(f"Top 10
|
239 |
ax.set_xlabel('Company', fontsize=14)
|
240 |
ax.set_ylabel('Count', fontsize=14)
|
241 |
ax.yaxis.set_major_locator(ticker.MaxNLocator(integer=True))
|
@@ -252,6 +233,7 @@ def chart_by_expert(file_path, expert_name):
|
|
252 |
|
253 |
|
254 |
|
|
|
255 |
# @title Top 10 des entreprises en termes de publications
|
256 |
|
257 |
|
@@ -313,7 +295,6 @@ def generate_company_chart(file_path):
|
|
313 |
top_10_df = pd.DataFrame(top_10_sources, columns=['Company', 'Count'])
|
314 |
|
315 |
# Plotting
|
316 |
-
#plt.style.use('dark_background')
|
317 |
fig, ax = plt.subplots(figsize=(14, 12))
|
318 |
ax.set_facecolor('#222c52')
|
319 |
fig.patch.set_facecolor('#222c52')
|
@@ -341,3 +322,4 @@ def generate_company_chart(file_path):
|
|
341 |
|
342 |
#plt.show()
|
343 |
return fig
|
|
|
|
4 |
import matplotlib.ticker as ticker
|
5 |
|
6 |
def category_chart(file_path):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
# Load the Excel file
|
8 |
+
df = pd.read_excel(file_path)
|
9 |
|
10 |
+
# Ensure the 'Topic' column exists and drop any rows without a topic
|
11 |
+
if 'Topic' not in df.columns or df['Topic'].isnull().all():
|
12 |
+
raise ValueError("The 'Topic' column is missing or empty.")
|
13 |
|
14 |
+
df.dropna(subset=['Topic'], inplace=True)
|
|
|
15 |
|
16 |
+
# Split multiple topics and flatten the list
|
17 |
+
all_topics = [topic.strip() for sublist in df['Topic'].str.split(',').tolist() for topic in sublist if topic]
|
18 |
+
|
19 |
+
# Count occurrences of each topic
|
20 |
+
topic_counts = Counter(all_topics)
|
21 |
|
22 |
# Convert to DataFrame for plotting
|
23 |
+
topic_counts_df = pd.DataFrame(topic_counts.items(), columns=['Topic', 'Count']).sort_values('Count', ascending=False)
|
|
|
24 |
|
25 |
# Plotting
|
26 |
+
plt.close('all')
|
27 |
+
fig, ax = plt.subplots(figsize=(14, 7))
|
28 |
ax.set_facecolor('#222c52')
|
29 |
fig.patch.set_facecolor('#222c52')
|
30 |
|
31 |
+
colors = ['#08F7FE' if i % 2 == 0 else '#FE53BB' for i in range(len(topic_counts_df))]
|
32 |
+
topic_counts_df.plot(kind='bar', x='Topic', y='Count', ax=ax, color=colors, edgecolor=colors, alpha=0.7, linewidth=2, legend=None)
|
|
|
33 |
|
|
|
34 |
ax.xaxis.label.set_color('white')
|
35 |
ax.yaxis.label.set_color('white')
|
36 |
+
ax.tick_params(axis='x', colors='white', labelsize=10, direction='out', length=6, width=2, rotation=45)
|
37 |
+
ax.tick_params(axis='y', colors='white', labelsize=10, direction='out', length=6, width=2)
|
38 |
+
ax.set_title('Topic Frequency Distribution', color='white', fontsize=16)
|
39 |
+
ax.set_xlabel('Topic', fontsize=14)
|
40 |
ax.set_ylabel('Count', fontsize=14)
|
41 |
ax.grid(True, which='both', axis='y', color='gray', linestyle='-', linewidth=0.5, alpha=0.5)
|
42 |
ax.set_axisbelow(True)
|
43 |
|
44 |
for spine in ax.spines.values():
|
45 |
spine.set_color('white')
|
46 |
+
spine.set_linewidth(1)
|
47 |
ax.spines['right'].set_visible(False)
|
48 |
ax.spines['top'].set_visible(False)
|
|
|
49 |
return fig
|
50 |
|
51 |
|
|
|
93 |
|
94 |
def company_document_type(file_path, company_names):
|
95 |
plt.close('all')
|
|
|
96 |
if isinstance(company_names, str):
|
97 |
+
company_names = [name.strip() for name in company_names.split(',')]
|
98 |
|
99 |
df = pd.read_excel(file_path)
|
100 |
+
|
101 |
fig, ax = plt.subplots(figsize=(14, 8))
|
102 |
ax.set_facecolor('#222c52')
|
103 |
fig.patch.set_facecolor('#222c52')
|
104 |
|
105 |
+
colors = ['#08F7FE', '#FE53BB', '#fff236'] # Add more colors if necessary
|
106 |
|
107 |
max_count = 0
|
108 |
for index, company_name in enumerate(company_names):
|
|
|
120 |
max_count = max(y_data)
|
121 |
|
122 |
ax.set_xticks(range(len(all_document_types)))
|
123 |
+
ax.set_xticklabels(all_document_types, rotation=45, fontsize=12, fontweight='bold', color='white')
|
124 |
ax.yaxis.set_major_locator(ticker.MaxNLocator(integer=True))
|
125 |
ax.set_ylabel('Count', color='white')
|
126 |
+
ax.set_title('Document Types Contributed by Companies', color='white')
|
127 |
ax.grid(True, which='both', axis='both', color='gray', linestyle='-', linewidth=0.5, alpha=0.5)
|
128 |
ax.set_axisbelow(True)
|
129 |
|
|
|
136 |
ax.spines['right'].set_visible(False)
|
137 |
ax.spines['top'].set_visible(False)
|
138 |
ax.spines['left'].set_position(('data', 0))
|
139 |
+
plt.legend(facecolor='#222c52', edgecolor='white', fontsize=12, labelcolor='white')
|
140 |
|
141 |
return fig
|
142 |
|
143 |
|
144 |
+
def get_expert(file_path):
|
145 |
+
# Load the Excel file
|
146 |
+
df = pd.read_excel(file_path)
|
147 |
+
|
148 |
+
# Ensure the 'Expert' column exists
|
149 |
+
if 'Expert' not in df.columns:
|
150 |
+
raise ValueError("The 'Expert' column is missing from the provided file.")
|
151 |
+
|
152 |
+
# Combine all the experts into a single list, accounting for multiple experts per row
|
153 |
+
all_experts = []
|
154 |
+
for experts in df['Expert'].dropna().unique():
|
155 |
+
all_experts.extend([expert.strip() for expert in experts.split(',')])
|
156 |
+
|
157 |
+
# Get unique experts and return them
|
158 |
+
unique_experts = sorted(set(all_experts))
|
159 |
+
return gr.update(choices=list(unique_experts))
|
160 |
|
161 |
def chart_by_expert(file_path, expert_name):
|
162 |
plt.close('all')
|
163 |
# Load the Excel file
|
164 |
data = pd.read_excel(file_path)
|
165 |
+
|
166 |
+
# Normalize the expert's name if it follows a specific format; otherwise, adjust accordingly
|
167 |
parts = expert_name.split('/')
|
168 |
+
name = parts[1].strip() if len(parts) > 1 else expert_name.strip()
|
169 |
|
170 |
+
# Normalize function for companies, similar to the original code
|
171 |
+
def normalize_companies(company_list, merge_entities):
|
172 |
+
normalized = set()
|
173 |
+
for company in company_list:
|
174 |
+
normalized_name = merge_entities.get(company.strip(), company.strip())
|
175 |
+
normalized.add(normalized_name)
|
176 |
+
return list(normalized)
|
177 |
|
178 |
+
# Define merge entities mapping, as provided
|
179 |
merge_entities = {
|
180 |
"Nokia Shanghai Bell": "Nokia",
|
181 |
"Qualcomm Korea": "Qualcomm",
|
182 |
+
# Add all other mappings as per the original code
|
183 |
+
# ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
"Hugues Network Systems": "Hughes"
|
185 |
}
|
186 |
|
187 |
+
# Adjust data processing to handle multiple experts and sources
|
188 |
+
# Flatten and normalize the source field across relevant rows
|
189 |
+
data['ExpertsList'] = data['Expert'].dropna().apply(lambda x: [expert.strip() for expert in x.split(',')])
|
190 |
+
data_exploded = data.explode('ExpertsList')
|
|
|
|
|
|
|
191 |
|
192 |
+
# Filter the data for the specified expert and handle multiple sources
|
193 |
+
filtered_data = data_exploded[data_exploded['ExpertsList'].str.contains(name, case=False, na=False)]
|
194 |
sources = filtered_data['Source'].dropna()
|
195 |
split_sources = sources.apply(lambda x: normalize_companies(x.split(', '), merge_entities))
|
|
|
|
|
196 |
all_sources = [company for sublist in split_sources for company in sublist]
|
197 |
|
198 |
+
# Count occurrences and get the top 10
|
199 |
source_counts = Counter(all_sources)
|
200 |
top_10_sources = source_counts.most_common(10)
|
201 |
|
|
|
203 |
top_10_df = pd.DataFrame(top_10_sources, columns=['Company', 'Count'])
|
204 |
|
205 |
# Plotting
|
|
|
206 |
fig, ax = plt.subplots(figsize=(14, 11))
|
207 |
ax.set_facecolor('#222c52')
|
208 |
fig.patch.set_facecolor('#222c52')
|
|
|
216 |
ax.yaxis.label.set_color('white')
|
217 |
ax.tick_params(axis='x', colors='white', labelsize=12, direction='out', length=6, width=2, rotation=45)
|
218 |
ax.tick_params(axis='y', colors='white', labelsize=12, direction='out', length=6, width=2)
|
219 |
+
ax.set_title(f"Top 10 Contributors for Expert '{expert_name}'", color='white', fontsize=16)
|
220 |
ax.set_xlabel('Company', fontsize=14)
|
221 |
ax.set_ylabel('Count', fontsize=14)
|
222 |
ax.yaxis.set_major_locator(ticker.MaxNLocator(integer=True))
|
|
|
233 |
|
234 |
|
235 |
|
236 |
+
|
237 |
# @title Top 10 des entreprises en termes de publications
|
238 |
|
239 |
|
|
|
295 |
top_10_df = pd.DataFrame(top_10_sources, columns=['Company', 'Count'])
|
296 |
|
297 |
# Plotting
|
|
|
298 |
fig, ax = plt.subplots(figsize=(14, 12))
|
299 |
ax.set_facecolor('#222c52')
|
300 |
fig.patch.set_facecolor('#222c52')
|
|
|
322 |
|
323 |
#plt.show()
|
324 |
return fig
|
325 |
+
|