Sadjad Alikhani commited on
Commit
8406d02
·
verified ·
1 Parent(s): 1484210

Update input_preprocess.py

Browse files
Files changed (1) hide show
  1. input_preprocess.py +291 -295
input_preprocess.py CHANGED
@@ -1,296 +1,292 @@
1
- # -*- coding: utf-8 -*-
2
- """
3
- Created on Fri Sep 13 16:13:29 2024
4
-
5
- This script generates preprocessed data from wireless communication scenarios,
6
- including token generation, patch creation, and data sampling for machine learning models.
7
-
8
- @author: salikha4
9
- """
10
-
11
- import numpy as np
12
- import os
13
- from tqdm import tqdm
14
- import time
15
- import pickle
16
- import DeepMIMOv3
17
-
18
- vars_folder = 'variables/'
19
- os.makedirs(vars_folder, exist_ok=True)
20
-
21
- #%% Scenarios List
22
- def scenarios_list():
23
- """Returns an array of available scenarios."""
24
- return np.array([
25
- 'city_18_denver', 'city_15_indianapolis', 'city_19_oklahoma',
26
- 'city_12_fortworth', 'city_11_santaclara', 'city_7_sandiego'
27
- ])
28
-
29
- #%% Token Generation
30
- def tokenizer(deepmimo_data, gen_raw=True):
31
- """
32
- Generates tokens by preparing and preprocessing the dataset.
33
-
34
- Args:
35
- scenario_idxs (list): Indices of the scenarios.
36
- patch_gen (bool): Whether to generate patches. Defaults to True.
37
- patch_size (int): Size of each patch. Defaults to 16.
38
- gen_deepMIMO_data (bool): Whether to generate DeepMIMO data. Defaults to False.
39
- gen_raw (bool): Whether to generate raw data. Defaults to False.
40
- save_data (bool): Whether to save the preprocessed data. Defaults to False.
41
-
42
- Returns:
43
- preprocessed_data, sequence_length, element_length: Preprocessed data and related dimensions.
44
- """
45
-
46
- # Patch generation or loading
47
- n_scenarios = len(deepmimo_data)
48
- patches = [patch_maker(deepmimo_data[scenario_idx]) for scenario_idx in range(n_scenarios)]
49
- patches = np.vstack(patches)
50
-
51
- # Define dimensions
52
- patch_size = patches.shape[2]
53
- n_patches = patches.shape[1]
54
- n_masks_half = int(0.15 * n_patches / 2)
55
- sequence_length = n_patches + 1
56
- element_length = patch_size
57
-
58
- word2id = {'[CLS]': 0.2 * np.ones((patch_size)), '[MASK]': 0.1 * np.ones((patch_size))}
59
-
60
- # Generate preprocessed channels
61
- preprocessed_data = []
62
- for user_idx in tqdm(range(len(patches)), desc="Processing items"):
63
- sample = make_sample(user_idx, patches, word2id, n_patches, n_masks_half, patch_size, gen_raw=gen_raw)
64
- preprocessed_data.append(sample)
65
-
66
- return preprocessed_data
67
-
68
- #%% Patch Creation
69
- def patch_maker(data, patch_size=16, norm_factor=1e6):
70
- """
71
- Creates patches from the dataset based on the scenario.
72
-
73
- Args:-
74
- patch_size (int): Size of each patch.
75
- scenario (str): Selected scenario for data generation.
76
- gen_deepMIMO_data (bool): Whether to generate DeepMIMO data.
77
- norm_factor (int): Normalization factor for channels.
78
-
79
- Returns:
80
- patch (numpy array): Generated patches.
81
- """
82
- idxs = np.where(data['user']['LoS'] != -1)[0]
83
-
84
- # Reshaping and normalizing channels
85
- original_ch = data['user']['channel'][idxs]
86
- flat_channels = original_ch.reshape((original_ch.shape[0], -1)).astype(np.csingle)
87
- flat_channels_complex = np.hstack((flat_channels.real, flat_channels.imag)) * norm_factor
88
-
89
- # Create patches
90
- n_patches = flat_channels_complex.shape[1] // patch_size
91
- patch = np.zeros((len(idxs), n_patches, patch_size))
92
- for idx in range(n_patches):
93
- patch[:, idx, :] = flat_channels_complex[:, idx * patch_size:(idx + 1) * patch_size]
94
-
95
- return patch
96
-
97
-
98
- #%% Data Generation for Scenario Areas
99
- def DeepMIMO_data_gen(scenario):
100
- """
101
- Generates or loads data for a given scenario.
102
-
103
- Args:
104
- scenario (str): Scenario name.
105
- gen_deepMIMO_data (bool): Whether to generate DeepMIMO data.
106
- save_data (bool): Whether to save generated data.
107
-
108
- Returns:
109
- data (dict): Loaded or generated data.
110
- """
111
-
112
- parameters, row_column_users, n_ant_bs, n_ant_ue, n_subcarriers = get_parameters(scenario)
113
-
114
- deepMIMO_dataset = DeepMIMOv3.generate_data(parameters)
115
- uniform_idxs = uniform_sampling(deepMIMO_dataset, [1, 1], len(parameters['user_rows']),
116
- users_per_row=row_column_users[scenario]['n_per_row'])
117
- data = select_by_idx(deepMIMO_dataset, uniform_idxs)[0]
118
-
119
- return data
120
-
121
- #%%%
122
- def get_parameters(scenario):
123
-
124
- n_ant_bs = 32 #32
125
- n_ant_ue = 1
126
- n_subcarriers = 32 #32
127
- scs = 30e3
128
-
129
- row_column_users = {
130
- 'city_18_denver': {
131
- 'n_rows': 85,
132
- 'n_per_row': 82
133
- },
134
- 'city_15_indianapolis': {
135
- 'n_rows': 80,
136
- 'n_per_row': 79
137
- },
138
- 'city_19_oklahoma': {
139
- 'n_rows': 82,
140
- 'n_per_row': 75
141
- },
142
- 'city_12_fortworth': {
143
- 'n_rows': 86,
144
- 'n_per_row': 72
145
- },
146
- 'city_11_santaclara': {
147
- 'n_rows': 47,
148
- 'n_per_row': 114
149
- },
150
- 'city_7_sandiego': {
151
- 'n_rows': 71,
152
- 'n_per_row': 83
153
- }}
154
-
155
- parameters = DeepMIMOv3.default_params()
156
- parameters['dataset_folder'] = './scenarios'
157
- parameters['scenario'] = scenario
158
-
159
- if scenario == 'O1_3p5':
160
- parameters['active_BS'] = np.array([4])
161
- elif scenario in ['city_18_denver', 'city_15_indianapolis']:
162
- parameters['active_BS'] = np.array([3])
163
- else:
164
- parameters['active_BS'] = np.array([1])
165
-
166
- if scenario == 'Boston5G_3p5':
167
- parameters['user_rows'] = np.arange(row_column_users[scenario]['n_rows'][0],
168
- row_column_users[scenario]['n_rows'][1])
169
- else:
170
- parameters['user_rows'] = np.arange(row_column_users[scenario]['n_rows'])
171
- parameters['bs_antenna']['shape'] = np.array([n_ant_bs, 1]) # Horizontal, Vertical
172
- parameters['bs_antenna']['rotation'] = np.array([0,0,-135]) # (x,y,z)
173
- parameters['ue_antenna']['shape'] = np.array([n_ant_ue, 1])
174
- parameters['enable_BS2BS'] = False
175
- parameters['OFDM']['subcarriers'] = n_subcarriers
176
- parameters['OFDM']['selected_subcarriers'] = np.arange(n_subcarriers)
177
-
178
- parameters['OFDM']['bandwidth'] = scs * n_subcarriers / 1e9
179
- parameters['num_paths'] = 20
180
-
181
- return parameters, row_column_users, n_ant_bs, n_ant_ue, n_subcarriers
182
-
183
-
184
- #%% Sample Generation
185
- def make_sample(user_idx, patch, word2id, n_patches, n_masks, patch_size, gen_raw=False):
186
- """
187
- Generates a sample for each user, including masking and tokenizing.
188
-
189
- Args:
190
- user_idx (int): Index of the user.
191
- patch (numpy array): Patches data.
192
- word2id (dict): Dictionary for special tokens.
193
- n_patches (int): Number of patches.
194
- n_masks (int): Number of masks.
195
- patch_size (int): Size of each patch.
196
- gen_raw (bool): Whether to generate raw tokens.
197
-
198
- Returns:
199
- sample (list): Generated sample for the user.
200
- """
201
-
202
- tokens = patch[user_idx]
203
- input_ids = np.vstack((word2id['[CLS]'], tokens))
204
-
205
- real_tokens_size = int(n_patches / 2)
206
- masks_pos_real = np.random.choice(range(0, real_tokens_size), size=n_masks, replace=False)
207
- masks_pos_imag = masks_pos_real + real_tokens_size
208
- masked_pos = np.hstack((masks_pos_real, masks_pos_imag)) + 1
209
-
210
- masked_tokens = []
211
- for pos in masked_pos:
212
- original_masked_tokens = input_ids[pos].copy()
213
- masked_tokens.append(original_masked_tokens)
214
- if not gen_raw:
215
- rnd_num = np.random.rand()
216
- if rnd_num < 0.1:
217
- input_ids[pos] = np.random.rand(patch_size)
218
- elif rnd_num < 0.9:
219
- input_ids[pos] = word2id['[MASK]']
220
-
221
- return [input_ids, masked_tokens, masked_pos]
222
-
223
-
224
- #%% Sampling and Data Selection
225
- def uniform_sampling(dataset, sampling_div, n_rows, users_per_row):
226
- """
227
- Performs uniform sampling on the dataset.
228
-
229
- Args:
230
- dataset (dict): DeepMIMO dataset.
231
- sampling_div (list): Step sizes along [x, y] dimensions.
232
- n_rows (int): Number of rows for user selection.
233
- users_per_row (int): Number of users per row.
234
-
235
- Returns:
236
- uniform_idxs (numpy array): Indices of the selected samples.
237
- """
238
- cols = np.arange(users_per_row, step=sampling_div[0])
239
- rows = np.arange(n_rows, step=sampling_div[1])
240
- uniform_idxs = np.array([j + i * users_per_row for i in rows for j in cols])
241
-
242
- return uniform_idxs
243
-
244
- def select_by_idx(dataset, idxs):
245
- """
246
- Selects a subset of the dataset based on the provided indices.
247
-
248
- Args:
249
- dataset (dict): Dataset to trim.
250
- idxs (numpy array): Indices of users to select.
251
-
252
- Returns:
253
- dataset_t (list): Trimmed dataset based on selected indices.
254
- """
255
- dataset_t = [] # Trimmed dataset
256
- for bs_idx in range(len(dataset)):
257
- dataset_t.append({})
258
- for key in dataset[bs_idx].keys():
259
- dataset_t[bs_idx]['location'] = dataset[bs_idx]['location']
260
- dataset_t[bs_idx]['user'] = {k: dataset[bs_idx]['user'][k][idxs] for k in dataset[bs_idx]['user']}
261
-
262
- return dataset_t
263
-
264
- #%% Save and Load Utilities
265
- def save_var(var, path):
266
- """
267
- Saves a variable to a pickle file.
268
-
269
- Args:
270
- var (object): Variable to be saved.
271
- path (str): Path to save the file.
272
-
273
- Returns:
274
- None
275
- """
276
- path_full = path if path.endswith('.p') else (path + '.pickle')
277
- with open(path_full, 'wb') as handle:
278
- pickle.dump(var, handle)
279
-
280
- def load_var(path):
281
- """
282
- Loads a variable from a pickle file.
283
-
284
- Args:
285
- path (str): Path of the file to load.
286
-
287
- Returns:
288
- var (object): Loaded variable.
289
- """
290
- path_full = path if path.endswith('.p') else (path + '.pickle')
291
- with open(path_full, 'rb') as handle:
292
- var = pickle.load(handle)
293
-
294
- return var
295
-
296
  #%%
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Fri Sep 13 16:13:29 2024
4
+
5
+ This script generates preprocessed data from wireless communication scenarios,
6
+ including token generation, patch creation, and data sampling for machine learning models.
7
+
8
+ @author: salikha4
9
+ """
10
+
11
+ import numpy as np
12
+ import os
13
+ from tqdm import tqdm
14
+ import time
15
+ import pickle
16
+
17
+ #%% Scenarios List
18
+ def scenarios_list():
19
+ """Returns an array of available scenarios."""
20
+ return np.array([
21
+ 'city_18_denver', 'city_15_indianapolis', 'city_19_oklahoma',
22
+ 'city_12_fortworth', 'city_11_santaclara', 'city_7_sandiego'
23
+ ])
24
+
25
+ #%% Token Generation
26
+ def tokenizer(deepmimo_data, gen_raw=True):
27
+ """
28
+ Generates tokens by preparing and preprocessing the dataset.
29
+
30
+ Args:
31
+ scenario_idxs (list): Indices of the scenarios.
32
+ patch_gen (bool): Whether to generate patches. Defaults to True.
33
+ patch_size (int): Size of each patch. Defaults to 16.
34
+ gen_deepMIMO_data (bool): Whether to generate DeepMIMO data. Defaults to False.
35
+ gen_raw (bool): Whether to generate raw data. Defaults to False.
36
+ save_data (bool): Whether to save the preprocessed data. Defaults to False.
37
+
38
+ Returns:
39
+ preprocessed_data, sequence_length, element_length: Preprocessed data and related dimensions.
40
+ """
41
+
42
+ # Patch generation or loading
43
+ n_scenarios = len(deepmimo_data)
44
+ patches = [patch_maker(deepmimo_data[scenario_idx]) for scenario_idx in range(n_scenarios)]
45
+ patches = np.vstack(patches)
46
+
47
+ # Define dimensions
48
+ patch_size = patches.shape[2]
49
+ n_patches = patches.shape[1]
50
+ n_masks_half = int(0.15 * n_patches / 2)
51
+ sequence_length = n_patches + 1
52
+ element_length = patch_size
53
+
54
+ word2id = {'[CLS]': 0.2 * np.ones((patch_size)), '[MASK]': 0.1 * np.ones((patch_size))}
55
+
56
+ # Generate preprocessed channels
57
+ preprocessed_data = []
58
+ for user_idx in tqdm(range(len(patches)), desc="Processing items"):
59
+ sample = make_sample(user_idx, patches, word2id, n_patches, n_masks_half, patch_size, gen_raw=gen_raw)
60
+ preprocessed_data.append(sample)
61
+
62
+ return preprocessed_data
63
+
64
+ #%% Patch Creation
65
+ def patch_maker(data, patch_size=16, norm_factor=1e6):
66
+ """
67
+ Creates patches from the dataset based on the scenario.
68
+
69
+ Args:-
70
+ patch_size (int): Size of each patch.
71
+ scenario (str): Selected scenario for data generation.
72
+ gen_deepMIMO_data (bool): Whether to generate DeepMIMO data.
73
+ norm_factor (int): Normalization factor for channels.
74
+
75
+ Returns:
76
+ patch (numpy array): Generated patches.
77
+ """
78
+ idxs = np.where(data['user']['LoS'] != -1)[0]
79
+
80
+ # Reshaping and normalizing channels
81
+ original_ch = data['user']['channel'][idxs]
82
+ flat_channels = original_ch.reshape((original_ch.shape[0], -1)).astype(np.csingle)
83
+ flat_channels_complex = np.hstack((flat_channels.real, flat_channels.imag)) * norm_factor
84
+
85
+ # Create patches
86
+ n_patches = flat_channels_complex.shape[1] // patch_size
87
+ patch = np.zeros((len(idxs), n_patches, patch_size))
88
+ for idx in range(n_patches):
89
+ patch[:, idx, :] = flat_channels_complex[:, idx * patch_size:(idx + 1) * patch_size]
90
+
91
+ return patch
92
+
93
+
94
+ #%% Data Generation for Scenario Areas
95
+ def DeepMIMO_data_gen(scenario):
96
+ """
97
+ Generates or loads data for a given scenario.
98
+
99
+ Args:
100
+ scenario (str): Scenario name.
101
+ gen_deepMIMO_data (bool): Whether to generate DeepMIMO data.
102
+ save_data (bool): Whether to save generated data.
103
+
104
+ Returns:
105
+ data (dict): Loaded or generated data.
106
+ """
107
+
108
+ parameters, row_column_users, n_ant_bs, n_ant_ue, n_subcarriers = get_parameters(scenario)
109
+
110
+ deepMIMO_dataset = DeepMIMOv3.generate_data(parameters)
111
+ uniform_idxs = uniform_sampling(deepMIMO_dataset, [1, 1], len(parameters['user_rows']),
112
+ users_per_row=row_column_users[scenario]['n_per_row'])
113
+ data = select_by_idx(deepMIMO_dataset, uniform_idxs)[0]
114
+
115
+ return data
116
+
117
+ #%%%
118
+ def get_parameters(scenario):
119
+
120
+ n_ant_bs = 32 #32
121
+ n_ant_ue = 1
122
+ n_subcarriers = 32 #32
123
+ scs = 30e3
124
+
125
+ row_column_users = {
126
+ 'city_18_denver': {
127
+ 'n_rows': 85,
128
+ 'n_per_row': 82
129
+ },
130
+ 'city_15_indianapolis': {
131
+ 'n_rows': 80,
132
+ 'n_per_row': 79
133
+ },
134
+ 'city_19_oklahoma': {
135
+ 'n_rows': 82,
136
+ 'n_per_row': 75
137
+ },
138
+ 'city_12_fortworth': {
139
+ 'n_rows': 86,
140
+ 'n_per_row': 72
141
+ },
142
+ 'city_11_santaclara': {
143
+ 'n_rows': 47,
144
+ 'n_per_row': 114
145
+ },
146
+ 'city_7_sandiego': {
147
+ 'n_rows': 71,
148
+ 'n_per_row': 83
149
+ }}
150
+
151
+ parameters = DeepMIMOv3.default_params()
152
+ parameters['dataset_folder'] = './scenarios'
153
+ parameters['scenario'] = scenario
154
+
155
+ if scenario == 'O1_3p5':
156
+ parameters['active_BS'] = np.array([4])
157
+ elif scenario in ['city_18_denver', 'city_15_indianapolis']:
158
+ parameters['active_BS'] = np.array([3])
159
+ else:
160
+ parameters['active_BS'] = np.array([1])
161
+
162
+ if scenario == 'Boston5G_3p5':
163
+ parameters['user_rows'] = np.arange(row_column_users[scenario]['n_rows'][0],
164
+ row_column_users[scenario]['n_rows'][1])
165
+ else:
166
+ parameters['user_rows'] = np.arange(row_column_users[scenario]['n_rows'])
167
+ parameters['bs_antenna']['shape'] = np.array([n_ant_bs, 1]) # Horizontal, Vertical
168
+ parameters['bs_antenna']['rotation'] = np.array([0,0,-135]) # (x,y,z)
169
+ parameters['ue_antenna']['shape'] = np.array([n_ant_ue, 1])
170
+ parameters['enable_BS2BS'] = False
171
+ parameters['OFDM']['subcarriers'] = n_subcarriers
172
+ parameters['OFDM']['selected_subcarriers'] = np.arange(n_subcarriers)
173
+
174
+ parameters['OFDM']['bandwidth'] = scs * n_subcarriers / 1e9
175
+ parameters['num_paths'] = 20
176
+
177
+ return parameters, row_column_users, n_ant_bs, n_ant_ue, n_subcarriers
178
+
179
+
180
+ #%% Sample Generation
181
+ def make_sample(user_idx, patch, word2id, n_patches, n_masks, patch_size, gen_raw=False):
182
+ """
183
+ Generates a sample for each user, including masking and tokenizing.
184
+
185
+ Args:
186
+ user_idx (int): Index of the user.
187
+ patch (numpy array): Patches data.
188
+ word2id (dict): Dictionary for special tokens.
189
+ n_patches (int): Number of patches.
190
+ n_masks (int): Number of masks.
191
+ patch_size (int): Size of each patch.
192
+ gen_raw (bool): Whether to generate raw tokens.
193
+
194
+ Returns:
195
+ sample (list): Generated sample for the user.
196
+ """
197
+
198
+ tokens = patch[user_idx]
199
+ input_ids = np.vstack((word2id['[CLS]'], tokens))
200
+
201
+ real_tokens_size = int(n_patches / 2)
202
+ masks_pos_real = np.random.choice(range(0, real_tokens_size), size=n_masks, replace=False)
203
+ masks_pos_imag = masks_pos_real + real_tokens_size
204
+ masked_pos = np.hstack((masks_pos_real, masks_pos_imag)) + 1
205
+
206
+ masked_tokens = []
207
+ for pos in masked_pos:
208
+ original_masked_tokens = input_ids[pos].copy()
209
+ masked_tokens.append(original_masked_tokens)
210
+ if not gen_raw:
211
+ rnd_num = np.random.rand()
212
+ if rnd_num < 0.1:
213
+ input_ids[pos] = np.random.rand(patch_size)
214
+ elif rnd_num < 0.9:
215
+ input_ids[pos] = word2id['[MASK]']
216
+
217
+ return [input_ids, masked_tokens, masked_pos]
218
+
219
+
220
+ #%% Sampling and Data Selection
221
+ def uniform_sampling(dataset, sampling_div, n_rows, users_per_row):
222
+ """
223
+ Performs uniform sampling on the dataset.
224
+
225
+ Args:
226
+ dataset (dict): DeepMIMO dataset.
227
+ sampling_div (list): Step sizes along [x, y] dimensions.
228
+ n_rows (int): Number of rows for user selection.
229
+ users_per_row (int): Number of users per row.
230
+
231
+ Returns:
232
+ uniform_idxs (numpy array): Indices of the selected samples.
233
+ """
234
+ cols = np.arange(users_per_row, step=sampling_div[0])
235
+ rows = np.arange(n_rows, step=sampling_div[1])
236
+ uniform_idxs = np.array([j + i * users_per_row for i in rows for j in cols])
237
+
238
+ return uniform_idxs
239
+
240
+ def select_by_idx(dataset, idxs):
241
+ """
242
+ Selects a subset of the dataset based on the provided indices.
243
+
244
+ Args:
245
+ dataset (dict): Dataset to trim.
246
+ idxs (numpy array): Indices of users to select.
247
+
248
+ Returns:
249
+ dataset_t (list): Trimmed dataset based on selected indices.
250
+ """
251
+ dataset_t = [] # Trimmed dataset
252
+ for bs_idx in range(len(dataset)):
253
+ dataset_t.append({})
254
+ for key in dataset[bs_idx].keys():
255
+ dataset_t[bs_idx]['location'] = dataset[bs_idx]['location']
256
+ dataset_t[bs_idx]['user'] = {k: dataset[bs_idx]['user'][k][idxs] for k in dataset[bs_idx]['user']}
257
+
258
+ return dataset_t
259
+
260
+ #%% Save and Load Utilities
261
+ def save_var(var, path):
262
+ """
263
+ Saves a variable to a pickle file.
264
+
265
+ Args:
266
+ var (object): Variable to be saved.
267
+ path (str): Path to save the file.
268
+
269
+ Returns:
270
+ None
271
+ """
272
+ path_full = path if path.endswith('.p') else (path + '.pickle')
273
+ with open(path_full, 'wb') as handle:
274
+ pickle.dump(var, handle)
275
+
276
+ def load_var(path):
277
+ """
278
+ Loads a variable from a pickle file.
279
+
280
+ Args:
281
+ path (str): Path of the file to load.
282
+
283
+ Returns:
284
+ var (object): Loaded variable.
285
+ """
286
+ path_full = path if path.endswith('.p') else (path + '.pickle')
287
+ with open(path_full, 'rb') as handle:
288
+ var = pickle.load(handle)
289
+
290
+ return var
291
+
 
 
 
 
292
  #%%