MaksG commited on
Commit
779436a
1 Parent(s): 7e6ba94

Update charts_advanced.py

Browse files
Files changed (1) hide show
  1. charts_advanced.py +65 -83
charts_advanced.py CHANGED
@@ -4,63 +4,48 @@ from collections import Counter
4
  import matplotlib.ticker as ticker
5
 
6
  def category_chart(file_path):
7
- plt.close('all')
8
- # Define expert to specialty mapping
9
- expert_specialties = {
10
- "mireille": "Security Trust",
11
- "khawla": "Network Security",
12
- "guillaume": "Distributed Networks",
13
- "vincent": "USIM Management",
14
- "pierre": "Eco-Design",
15
- "ly-thanh": "Trend Analysis",
16
- "nicolas": "Satellite Networks",
17
- "dorin": "Emergency Communication"
18
- }
19
-
20
  # Load the Excel file
21
- data = pd.read_excel(file_path)
22
 
23
- # Assuming experts are listed in a column named 'Experts'
24
- # This part might need to be adjusted based on the actual structure of your Excel file
25
- experts = data['Expert'].dropna()
26
 
27
- # Map experts to their specialties
28
- specialties = experts.apply(lambda expert: expert_specialties.get(expert.strip(), "Other"))
29
 
30
- # Count occurrences
31
- specialty_counts = specialties.value_counts()
 
 
 
32
 
33
  # Convert to DataFrame for plotting
34
- specialty_counts_df = specialty_counts.reset_index()
35
- specialty_counts_df.columns = ['Specialty', 'Count']
36
 
37
  # Plotting
38
-
39
- fig, ax = plt.subplots(figsize=(14, 14))
40
  ax.set_facecolor('#222c52')
41
  fig.patch.set_facecolor('#222c52')
42
 
43
- # Alternating colors for the bars
44
- colors = ['#08F7FE' if i % 2 == 0 else '#FE53BB' for i in range(len(specialty_counts_df))]
45
- specialty_counts_df.plot(kind='bar', x='Specialty', y='Count', ax=ax, color=colors, edgecolor=colors, alpha=0.5, linewidth=5, legend=None)
46
 
47
- # Set chart details
48
  ax.xaxis.label.set_color('white')
49
  ax.yaxis.label.set_color('white')
50
- ax.tick_params(axis='x', colors='white', labelsize=12, direction='out', length=6, width=2, rotation=42)
51
- ax.tick_params(axis='y', colors='white', labelsize=12, direction='out', length=6, width=2)
52
- ax.set_title('Most Used Expert Specialties', color='white', fontsize=16)
53
- ax.set_xlabel('Specialty', fontsize=14)
54
  ax.set_ylabel('Count', fontsize=14)
55
  ax.grid(True, which='both', axis='y', color='gray', linestyle='-', linewidth=0.5, alpha=0.5)
56
  ax.set_axisbelow(True)
57
 
58
  for spine in ax.spines.values():
59
  spine.set_color('white')
60
- spine.set_linewidth(2)
61
  ax.spines['right'].set_visible(False)
62
  ax.spines['top'].set_visible(False)
63
-
64
  return fig
65
 
66
 
@@ -108,17 +93,16 @@ def plot_glowing_line_with_dots_enhanced(ax, x, y, color, label, glow_size=10, b
108
 
109
  def company_document_type(file_path, company_names):
110
  plt.close('all')
111
- # Convert company_names to a list if it's a string
112
  if isinstance(company_names, str):
113
- company_names = [name.strip() for name in company_names.split(',')] # Ensure it's a list even for single company name
114
 
115
  df = pd.read_excel(file_path)
116
-
117
  fig, ax = plt.subplots(figsize=(14, 8))
118
  ax.set_facecolor('#222c52')
119
  fig.patch.set_facecolor('#222c52')
120
 
121
- colors = ['#08F7FE', '#FE53BB', '#fff236'] # Assign more colors for more companies
122
 
123
  max_count = 0
124
  for index, company_name in enumerate(company_names):
@@ -136,10 +120,10 @@ def company_document_type(file_path, company_names):
136
  max_count = max(y_data)
137
 
138
  ax.set_xticks(range(len(all_document_types)))
139
- ax.set_xticklabels(all_document_types, rotation=45, fontsize=12, fontweight='bold')
140
  ax.yaxis.set_major_locator(ticker.MaxNLocator(integer=True))
141
  ax.set_ylabel('Count', color='white')
142
- ax.set_title('Document Types Contributed by Companies')
143
  ax.grid(True, which='both', axis='both', color='gray', linestyle='-', linewidth=0.5, alpha=0.5)
144
  ax.set_axisbelow(True)
145
 
@@ -152,68 +136,66 @@ def company_document_type(file_path, company_names):
152
  ax.spines['right'].set_visible(False)
153
  ax.spines['top'].set_visible(False)
154
  ax.spines['left'].set_position(('data', 0))
155
- plt.legend(facecolor='#222c52', edgecolor='white', fontsize=12)
156
 
157
  return fig
158
 
159
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
  def chart_by_expert(file_path, expert_name):
162
  plt.close('all')
163
  # Load the Excel file
164
  data = pd.read_excel(file_path)
165
-
 
166
  parts = expert_name.split('/')
 
167
 
168
- # The name would be the second part, trim spaces
169
- name = parts[1].strip()
170
- # Filter data for the specified expert
171
- filtered_data = data[data['Expert'] == name.lower()]
 
 
 
172
 
173
- # Define merge entities mapping
174
  merge_entities = {
175
  "Nokia Shanghai Bell": "Nokia",
176
  "Qualcomm Korea": "Qualcomm",
177
- "Qualcomm Incorporated": "Qualcomm",
178
- "Huawei Technologies R&D UK": "Huawei",
179
- "Hughes Network Systems": "Hughes",
180
- "HUGHES Network Systems": "Hughes",
181
- "Hughes Network systems": "Hughes",
182
- "HUGHES Network Systems Ltd": "Hughes",
183
- "KT Corp.": "KT Corporation",
184
- "LG Electronics Inc.": "LG Electronics",
185
- "LG Uplus": "LG Electronics",
186
- "OPPO (chongqing) Intelligence": "OPPO",
187
- "Samsung Electronics GmbH": "Samsung",
188
- "China Mobile International Ltd": "China Mobile",
189
- "NOVAMINT": "Novamint",
190
- "Eutelsat": "Eutelsat Group",
191
- "Inmarsat Viasat": "Inmarsat",
192
- "China Telecommunications": "China Telecom",
193
- "SES S.A.": "SES",
194
- "Ericsson GmbH": "Ericsson",
195
- "JSAT": "SKY Perfect JSAT",
196
- "NEC Europe Ltd": "NEC",
197
- "Fraunhofer IIS": "Fraunhofer",
198
  "Hugues Network Systems": "Hughes"
199
  }
200
 
201
- # Normalize company names within each cell
202
- def normalize_companies(company_list, merge_entities):
203
- normalized = set() # Use a set to avoid duplicates within the same cell
204
- for company in company_list:
205
- normalized_name = merge_entities.get(company.strip(), company.strip())
206
- normalized.add(normalized_name)
207
- return list(normalized)
208
 
209
- # Prepare the filtered data
 
210
  sources = filtered_data['Source'].dropna()
211
  split_sources = sources.apply(lambda x: normalize_companies(x.split(', '), merge_entities))
212
-
213
- # Flatten the list of lists while applying the merge rules
214
  all_sources = [company for sublist in split_sources for company in sublist]
215
 
216
- # Count occurrences
217
  source_counts = Counter(all_sources)
218
  top_10_sources = source_counts.most_common(10)
219
 
@@ -221,7 +203,6 @@ def chart_by_expert(file_path, expert_name):
221
  top_10_df = pd.DataFrame(top_10_sources, columns=['Company', 'Count'])
222
 
223
  # Plotting
224
- #plt.style.use('dark_background')
225
  fig, ax = plt.subplots(figsize=(14, 11))
226
  ax.set_facecolor('#222c52')
227
  fig.patch.set_facecolor('#222c52')
@@ -235,7 +216,7 @@ def chart_by_expert(file_path, expert_name):
235
  ax.yaxis.label.set_color('white')
236
  ax.tick_params(axis='x', colors='white', labelsize=12, direction='out', length=6, width=2, rotation=45)
237
  ax.tick_params(axis='y', colors='white', labelsize=12, direction='out', length=6, width=2)
238
- ax.set_title(f"Top 10 Cotributors for Expert '{expert_name}'", color='white', fontsize=16)
239
  ax.set_xlabel('Company', fontsize=14)
240
  ax.set_ylabel('Count', fontsize=14)
241
  ax.yaxis.set_major_locator(ticker.MaxNLocator(integer=True))
@@ -252,6 +233,7 @@ def chart_by_expert(file_path, expert_name):
252
 
253
 
254
 
 
255
  # @title Top 10 des entreprises en termes de publications
256
 
257
 
@@ -313,7 +295,6 @@ def generate_company_chart(file_path):
313
  top_10_df = pd.DataFrame(top_10_sources, columns=['Company', 'Count'])
314
 
315
  # Plotting
316
- #plt.style.use('dark_background')
317
  fig, ax = plt.subplots(figsize=(14, 12))
318
  ax.set_facecolor('#222c52')
319
  fig.patch.set_facecolor('#222c52')
@@ -341,3 +322,4 @@ def generate_company_chart(file_path):
341
 
342
  #plt.show()
343
  return fig
 
 
4
  import matplotlib.ticker as ticker
5
 
6
  def category_chart(file_path):
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  # Load the Excel file
8
+ df = pd.read_excel(file_path)
9
 
10
+ # Ensure the 'Topic' column exists and drop any rows without a topic
11
+ if 'Topic' not in df.columns or df['Topic'].isnull().all():
12
+ raise ValueError("The 'Topic' column is missing or empty.")
13
 
14
+ df.dropna(subset=['Topic'], inplace=True)
 
15
 
16
+ # Split multiple topics and flatten the list
17
+ all_topics = [topic.strip() for sublist in df['Topic'].str.split(',').tolist() for topic in sublist if topic]
18
+
19
+ # Count occurrences of each topic
20
+ topic_counts = Counter(all_topics)
21
 
22
  # Convert to DataFrame for plotting
23
+ topic_counts_df = pd.DataFrame(topic_counts.items(), columns=['Topic', 'Count']).sort_values('Count', ascending=False)
 
24
 
25
  # Plotting
26
+ plt.close('all')
27
+ fig, ax = plt.subplots(figsize=(14, 7))
28
  ax.set_facecolor('#222c52')
29
  fig.patch.set_facecolor('#222c52')
30
 
31
+ colors = ['#08F7FE' if i % 2 == 0 else '#FE53BB' for i in range(len(topic_counts_df))]
32
+ topic_counts_df.plot(kind='bar', x='Topic', y='Count', ax=ax, color=colors, edgecolor=colors, alpha=0.7, linewidth=2, legend=None)
 
33
 
 
34
  ax.xaxis.label.set_color('white')
35
  ax.yaxis.label.set_color('white')
36
+ ax.tick_params(axis='x', colors='white', labelsize=10, direction='out', length=6, width=2, rotation=45)
37
+ ax.tick_params(axis='y', colors='white', labelsize=10, direction='out', length=6, width=2)
38
+ ax.set_title('Topic Frequency Distribution', color='white', fontsize=16)
39
+ ax.set_xlabel('Topic', fontsize=14)
40
  ax.set_ylabel('Count', fontsize=14)
41
  ax.grid(True, which='both', axis='y', color='gray', linestyle='-', linewidth=0.5, alpha=0.5)
42
  ax.set_axisbelow(True)
43
 
44
  for spine in ax.spines.values():
45
  spine.set_color('white')
46
+ spine.set_linewidth(1)
47
  ax.spines['right'].set_visible(False)
48
  ax.spines['top'].set_visible(False)
 
49
  return fig
50
 
51
 
 
93
 
94
  def company_document_type(file_path, company_names):
95
  plt.close('all')
 
96
  if isinstance(company_names, str):
97
+ company_names = [name.strip() for name in company_names.split(',')]
98
 
99
  df = pd.read_excel(file_path)
100
+
101
  fig, ax = plt.subplots(figsize=(14, 8))
102
  ax.set_facecolor('#222c52')
103
  fig.patch.set_facecolor('#222c52')
104
 
105
+ colors = ['#08F7FE', '#FE53BB', '#fff236'] # Add more colors if necessary
106
 
107
  max_count = 0
108
  for index, company_name in enumerate(company_names):
 
120
  max_count = max(y_data)
121
 
122
  ax.set_xticks(range(len(all_document_types)))
123
+ ax.set_xticklabels(all_document_types, rotation=45, fontsize=12, fontweight='bold', color='white')
124
  ax.yaxis.set_major_locator(ticker.MaxNLocator(integer=True))
125
  ax.set_ylabel('Count', color='white')
126
+ ax.set_title('Document Types Contributed by Companies', color='white')
127
  ax.grid(True, which='both', axis='both', color='gray', linestyle='-', linewidth=0.5, alpha=0.5)
128
  ax.set_axisbelow(True)
129
 
 
136
  ax.spines['right'].set_visible(False)
137
  ax.spines['top'].set_visible(False)
138
  ax.spines['left'].set_position(('data', 0))
139
+ plt.legend(facecolor='#222c52', edgecolor='white', fontsize=12, labelcolor='white')
140
 
141
  return fig
142
 
143
 
144
+ def get_expert(file_path):
145
+ # Load the Excel file
146
+ df = pd.read_excel(file_path)
147
+
148
+ # Ensure the 'Expert' column exists
149
+ if 'Expert' not in df.columns:
150
+ raise ValueError("The 'Expert' column is missing from the provided file.")
151
+
152
+ # Combine all the experts into a single list, accounting for multiple experts per row
153
+ all_experts = []
154
+ for experts in df['Expert'].dropna().unique():
155
+ all_experts.extend([expert.strip() for expert in experts.split(',')])
156
+
157
+ # Get unique experts and return them
158
+ unique_experts = sorted(set(all_experts))
159
+ return gr.update(choices=list(unique_experts))
160
 
161
  def chart_by_expert(file_path, expert_name):
162
  plt.close('all')
163
  # Load the Excel file
164
  data = pd.read_excel(file_path)
165
+
166
+ # Normalize the expert's name if it follows a specific format; otherwise, adjust accordingly
167
  parts = expert_name.split('/')
168
+ name = parts[1].strip() if len(parts) > 1 else expert_name.strip()
169
 
170
+ # Normalize function for companies, similar to the original code
171
+ def normalize_companies(company_list, merge_entities):
172
+ normalized = set()
173
+ for company in company_list:
174
+ normalized_name = merge_entities.get(company.strip(), company.strip())
175
+ normalized.add(normalized_name)
176
+ return list(normalized)
177
 
178
+ # Define merge entities mapping, as provided
179
  merge_entities = {
180
  "Nokia Shanghai Bell": "Nokia",
181
  "Qualcomm Korea": "Qualcomm",
182
+ # Add all other mappings as per the original code
183
+ # ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  "Hugues Network Systems": "Hughes"
185
  }
186
 
187
+ # Adjust data processing to handle multiple experts and sources
188
+ # Flatten and normalize the source field across relevant rows
189
+ data['ExpertsList'] = data['Expert'].dropna().apply(lambda x: [expert.strip() for expert in x.split(',')])
190
+ data_exploded = data.explode('ExpertsList')
 
 
 
191
 
192
+ # Filter the data for the specified expert and handle multiple sources
193
+ filtered_data = data_exploded[data_exploded['ExpertsList'].str.contains(name, case=False, na=False)]
194
  sources = filtered_data['Source'].dropna()
195
  split_sources = sources.apply(lambda x: normalize_companies(x.split(', '), merge_entities))
 
 
196
  all_sources = [company for sublist in split_sources for company in sublist]
197
 
198
+ # Count occurrences and get the top 10
199
  source_counts = Counter(all_sources)
200
  top_10_sources = source_counts.most_common(10)
201
 
 
203
  top_10_df = pd.DataFrame(top_10_sources, columns=['Company', 'Count'])
204
 
205
  # Plotting
 
206
  fig, ax = plt.subplots(figsize=(14, 11))
207
  ax.set_facecolor('#222c52')
208
  fig.patch.set_facecolor('#222c52')
 
216
  ax.yaxis.label.set_color('white')
217
  ax.tick_params(axis='x', colors='white', labelsize=12, direction='out', length=6, width=2, rotation=45)
218
  ax.tick_params(axis='y', colors='white', labelsize=12, direction='out', length=6, width=2)
219
+ ax.set_title(f"Top 10 Contributors for Expert '{expert_name}'", color='white', fontsize=16)
220
  ax.set_xlabel('Company', fontsize=14)
221
  ax.set_ylabel('Count', fontsize=14)
222
  ax.yaxis.set_major_locator(ticker.MaxNLocator(integer=True))
 
233
 
234
 
235
 
236
+
237
  # @title Top 10 des entreprises en termes de publications
238
 
239
 
 
295
  top_10_df = pd.DataFrame(top_10_sources, columns=['Company', 'Count'])
296
 
297
  # Plotting
 
298
  fig, ax = plt.subplots(figsize=(14, 12))
299
  ax.set_facecolor('#222c52')
300
  fig.patch.set_facecolor('#222c52')
 
322
 
323
  #plt.show()
324
  return fig
325
+