nancyH commited on
Commit
b46126b
·
verified ·
1 Parent(s): 06f4eae

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ chr1_dna.txt filter=lfs diff=lfs merge=lfs -text
check_vocab.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import torch.nn.functional as F
6
+
7
+ # =====================
8
+ # 6. MODEL: SPARSE AUTOENCODER
9
+ # =====================
10
+ INPUT_DIM = L * 5 # 4 DNA + 1 phyloP
11
+ LATENT_DIM = 2048
12
+ HIDDEN_DIM = 1024
13
+
14
+ class SparseAE(nn.Module):
15
+ def __init__(self, input_dim=INPUT_DIM, latent_dim=LATENT_DIM, hidden_dim=HIDDEN_DIM):
16
+ super().__init__()
17
+
18
+ # Encoder
19
+ self.encoder = nn.Sequential(
20
+ nn.Linear(input_dim, hidden_dim),
21
+ nn.ReLU(),
22
+ nn.Linear(hidden_dim, latent_dim),
23
+ nn.ReLU() # ReLU helps sparsity with L1
24
+ )
25
+
26
+ # Decoder shared
27
+ self.dec_hidden = nn.Linear(latent_dim, hidden_dim)
28
+
29
+ # Decoder heads
30
+ self.dec_dna = nn.Linear(hidden_dim, L * 4)
31
+ self.dec_phy = nn.Linear(hidden_dim, L * 1)
32
+
33
+ def forward(self, dna, phy):
34
+ B = dna.size(0)
35
+
36
+ x = torch.cat(
37
+ [dna.reshape(B, -1), phy.reshape(B, -1)],
38
+ dim=1
39
+ ) # (B, INPUT_DIM)
40
+
41
+ h = self.encoder(x)
42
+ dec = F.relu(self.dec_hidden(h))
43
+
44
+ recon_dna = self.dec_dna(dec).reshape(B, L, 4) # (B, L, 4)
45
+ recon_phy = torch.tanh(self.dec_phy(dec)).reshape(B, L) # (B, L)
46
+
47
+ return recon_dna, recon_phy, h
48
+
49
+
50
+ # Setup
51
+ L = 50
52
+ device = "cuda" if torch.cuda.is_available() else "cpu"
53
+ model = SparseAE().to(device)
54
+
55
+ # Load the final checkpoint
56
+ model.load_state_dict(torch.load("sparse_ae_50bp_epoch3.pt", map_location=device))
57
+ model.eval()
58
+ print("Model loaded.")
59
+
60
+ # --- GENERATE FAKE DATA (Or load real if you prefer) ---
61
+ print("Generating test data...")
62
+ # Create random DNA (approximate genomic distribution)
63
+ N_SAMPLES = 10000
64
+ probs = torch.tensor([0.25, 0.25, 0.25, 0.25]) # A, C, G, T
65
+ test_dna_idx = torch.multinomial(probs, N_SAMPLES * L, replacement=True).view(N_SAMPLES, L)
66
+ test_dna = F.one_hot(test_dna_idx, num_classes=4).float().to(device)
67
+ test_phy = torch.randn(N_SAMPLES, L).to(device)
68
+
69
+ # --- RUN INFERENCE ---
70
+ print("Running inference...")
71
+ with torch.no_grad():
72
+ # Run model to get latent 'h'
73
+ # Note: If you used Top-K in training, ensure you use it here too.
74
+ # If you used standard L1/KL, just get 'h' from encoder.
75
+ B = test_dna.size(0)
76
+ x = torch.cat([test_dna.reshape(B, -1), test_phy.reshape(B, -1)], dim=1)
77
+ h = model.encoder(x)
78
+
79
+
80
+ # --- ANALYZE VOCABULARY ---
81
+ h_np = h.cpu().numpy()
82
+
83
+ # 1. How often is each token used? (Frequency)
84
+ # We count a neuron as "firing" if it > 0.1
85
+ neuron_firing_counts = np.sum(h_np > 0.1, axis=0) # Shape (2048,)
86
+
87
+ # 2. Sort them
88
+ sorted_counts = np.sort(neuron_firing_counts)[::-1]
89
+
90
+ print("\n--- VOCABULARY HEALTH CHECK ---")
91
+ print(f"Total Neurons: 2048")
92
+ print(f"Dead Neurons (Never fire): {np.sum(neuron_firing_counts == 0)}")
93
+ print(f"Rare Neurons (Fire < 10 times): {np.sum(neuron_firing_counts < 10)}")
94
+ print(f"Common Neurons (Fire > 1000 times): {np.sum(neuron_firing_counts > 1000)}")
chr1_dna.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74c534c06853bf3f631c627e6b026bd2a29cade1926c19eda6fa03e462f86f02
3
+ size 248932432
chr1_phyloP_norm.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36f76db186a5031c277f406f4df0bf24fae1d5db15b715daa0303b6cbcebeb06
3
+ size 995729856
consensus_string.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import glob
3
+
4
+ # Helper to map index to letter
5
+ idx_to_base = ['A', 'C', 'G', 'T']
6
+
7
+ # Find your files
8
+ pwm_files = glob.glob("token*_pwm.npy")
9
+ pwm_files.sort()
10
+
11
+ print(f"{'Token ID':<10} | {'Consensus Sequence (50bp)':<55}")
12
+ print("-" * 70)
13
+
14
+ for pwm_file in pwm_files:
15
+ # Get ID
16
+ tid = pwm_file.split("token")[1].split("_")[0]
17
+
18
+ # Load Matrix (50, 4)
19
+ pwm = np.load(pwm_file)
20
+
21
+ # Generate Consensus String
22
+ consensus = []
23
+ for row in pwm:
24
+ # row is [prob_A, prob_C, prob_G, prob_T]
25
+ max_idx = np.argmax(row)
26
+ max_val = row[max_idx]
27
+
28
+ # If the probability is low (e.g., < 0.4), it's just noise/background
29
+ if max_val < 0.25:
30
+ consensus.append(".") # Low confidence
31
+ else:
32
+ consensus.append(idx_to_base[max_idx])
33
+
34
+ seq_str = "".join(consensus)
35
+ print(f"{tid:<10} | {seq_str}")
extract.py ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.nn.functional as F
5
+ from torch.utils.data import Dataset, DataLoader, RandomSampler
6
+ from collections import defaultdict
7
+ from tqdm import tqdm
8
+
9
+
10
+
11
+
12
+ # =====================
13
+ # 1. SETUP & DATA LOADING
14
+ # =====================
15
+ print("Loading sequence / phyloP data...")
16
+
17
+ dna_path = "/home/n5huang/dna_token/SparseAE/chr1_dna.txt"
18
+ phy_path = "/home/n5huang/dna_token/SparseAE/chr1_phyloP_norm.npy"
19
+
20
+ with open(dna_path) as f:
21
+ sequence = f.read().strip()
22
+
23
+ phy_norm = np.load(phy_path)
24
+
25
+ assert len(sequence) == len(phy_norm), "DNA and phyloP length mismatch!"
26
+ chrom_len = len(sequence)
27
+ print(f"Chromosome 1 length: {chrom_len:,} bp")
28
+
29
+ # =====================
30
+ # 2. DNA ENCODING (HANDLE 'N')
31
+ # =====================
32
+ print("Encoding DNA to one-hot (with N handling)...")
33
+
34
+ mapping = {'A': 0, 'C': 1, 'G': 2, 'T': 3}
35
+
36
+ # Map bases to ints, using 4 as "N/unknown"
37
+ dna_int = np.fromiter((mapping.get(b, 4) for b in sequence), dtype=np.int8)
38
+ num_N = np.sum(dna_int == 4)
39
+ print(f"Number of N bases: {num_N:,}")
40
+
41
+ # One-hot with an extra row for N
42
+ # 0=A,1=C,2=G,3=T,4=[0,0,0,0,1]
43
+ temp_onehot = np.eye(5, dtype=np.float32)[dna_int]
44
+
45
+ # Slice to first 4 columns: N -> [0,0,0,0]
46
+ dna_onehot = temp_onehot[:, :4] # shape (chrom_len, 4)
47
+
48
+ # =====================
49
+ # 3. PHYLOP CHECK + COMBINE
50
+ # =====================
51
+ print("Preparing combined tensor...")
52
+
53
+ # Assume phy_norm is already in [-1,1]; warn if not.
54
+ max_abs_phy = np.max(np.abs(phy_norm))
55
+ if max_abs_phy > 1.1:
56
+ print(f"WARNING: phy_norm max abs={max_abs_phy:.3f} > 1.1; "
57
+ f"data may not be normalized as expected.")
58
+
59
+ phy_norm = phy_norm.astype(np.float32)
60
+ phy_col = phy_norm.reshape(-1, 1) # (chrom_len, 1)
61
+
62
+ combined_np = np.concatenate([dna_onehot, phy_col], axis=1) # (chrom_len, 5)
63
+ combined_tensor = torch.from_numpy(combined_np) # CPU tensor
64
+
65
+ print(f"Master tensor shape: {combined_tensor.shape}")
66
+
67
+ # =====================
68
+ # 4. DATASET: CHUNKED WINDOWING
69
+ # =====================
70
+ L = 50
71
+
72
+ class ChunkedChr1Dataset(Dataset):
73
+ def __init__(self, combined, L=50):
74
+ self.combined = combined
75
+ self.L = L
76
+ self.N = combined.shape[0] - L # number of valid start positions
77
+
78
+ def __len__(self):
79
+ return self.N
80
+
81
+ def __getitem__(self, idx):
82
+ # window: (L, 5)
83
+ window = self.combined[idx : idx + self.L]
84
+ dna = window[:, :4] # (L, 4)
85
+ phy = window[:, 4] # (L,)
86
+ return dna, phy, idx
87
+
88
+ dataset = ChunkedChr1Dataset(combined_tensor, L=L)
89
+ print(f"Dataset length (#windows): {len(dataset):,}")
90
+
91
+ # =====================
92
+ # 5. DATALOADER WITH RANDOM SAMPLER
93
+ # =====================
94
+ BATCH_SIZE = 1024
95
+ SAMPLES_PER_EPOCH = 5_000_000 # number of windows per epoch (tunable)
96
+
97
+ sampler = RandomSampler(
98
+ dataset,
99
+ replacement=True,
100
+ num_samples=SAMPLES_PER_EPOCH
101
+ )
102
+
103
+ loader = DataLoader(
104
+ dataset,
105
+ batch_size=BATCH_SIZE,
106
+ sampler=sampler,
107
+ shuffle=False, # <--- MUST BE FALSE for mapping back to genome
108
+ drop_last=False, # <--- Process every last bit
109
+ num_workers=0, # safer on large dataset
110
+ pin_memory=True
111
+ )
112
+
113
+ print("DataLoader ready.")
114
+
115
+ # =====================
116
+ # 6. MODEL: SPARSE AUTOENCODER
117
+ # =====================
118
+ INPUT_DIM = L * 5 # 4 DNA + 1 phyloP
119
+ LATENT_DIM = 2048
120
+ HIDDEN_DIM = 1024
121
+
122
+ class SparseAE(nn.Module):
123
+ def __init__(self, input_dim=INPUT_DIM, latent_dim=LATENT_DIM, hidden_dim=HIDDEN_DIM):
124
+ super().__init__()
125
+
126
+ # Encoder
127
+ self.encoder = nn.Sequential(
128
+ nn.Linear(input_dim, hidden_dim),
129
+ nn.ReLU(),
130
+ nn.Linear(hidden_dim, latent_dim),
131
+ nn.ReLU() # ReLU helps sparsity with L1
132
+ )
133
+
134
+ # Decoder shared
135
+ self.dec_hidden = nn.Linear(latent_dim, hidden_dim)
136
+
137
+ # Decoder heads
138
+ self.dec_dna = nn.Linear(hidden_dim, L * 4)
139
+ self.dec_phy = nn.Linear(hidden_dim, L * 1)
140
+
141
+ def forward(self, dna, phy):
142
+ B = dna.size(0)
143
+
144
+ x = torch.cat(
145
+ [dna.reshape(B, -1), phy.reshape(B, -1)],
146
+ dim=1
147
+ ) # (B, INPUT_DIM)
148
+
149
+ h = self.encoder(x)
150
+ dec = F.relu(self.dec_hidden(h))
151
+
152
+ recon_dna = self.dec_dna(dec).reshape(B, L, 4) # (B, L, 4)
153
+ recon_phy = torch.tanh(self.dec_phy(dec)).reshape(B, L) # (B, L)
154
+
155
+ return recon_dna, recon_phy, h
156
+
157
+ ########################################
158
+ # 4. LOAD CHECKPOINT
159
+ ########################################
160
+
161
+ device = "cuda" if torch.cuda.is_available() else "cpu"
162
+ print(f"Using device: {device}")
163
+
164
+ model = SparseAE().to(device)
165
+ model.load_state_dict(torch.load("sparse_ae_50bp_epoch3.pt", map_location=device))
166
+ model.eval()
167
+ print("Model loaded.")
168
+
169
+
170
+ ########################################
171
+ # 5. TOKEN EXTRACTION
172
+ ########################################
173
+
174
+ print("Extracting tokens...")
175
+
176
+ all_token_ids = np.zeros(len(dataset), dtype=np.int32)
177
+ #h_values = np.zeros((len(dataset), LATENT_DIM), dtype=np.float32)
178
+
179
+ with torch.no_grad():
180
+ offset = 0
181
+ for dna_batch, phy_batch, idx_batch in tqdm(loader):
182
+ dna_batch = dna_batch.to(device).float()
183
+ phy_batch = phy_batch.to(device).float()
184
+
185
+ _, _, h = model(dna_batch, phy_batch)
186
+ h_cpu = h.cpu().numpy()
187
+
188
+ # argmax token
189
+ token_ids = np.argmax(h_cpu, axis=1)
190
+
191
+ all_token_ids[offset : offset + len(token_ids)] = token_ids
192
+ #h_values[offset : offset + len(token_ids)] = h_cpu
193
+
194
+ offset += len(token_ids)
195
+
196
+ print("Token extraction complete.")
197
+
198
+ np.save("token_ids.npy", all_token_ids)
199
+ #np.save("latent_h.npy", h_values)
200
+
201
+
202
+
203
+ # Histogram
204
+ hist = np.bincount(all_token_ids, minlength=LATENT_DIM)
205
+ np.save("token_hist.npy", hist)
206
+
207
+ print("Top tokens:")
208
+ #top_tokens = np.argsort(hist)[::-1][:20]
209
+ top_tokens = np.argsort(hist)[:20]
210
+
211
+ for t in top_tokens:
212
+ print(f"Token {t}: count={hist[t]}")
213
+
214
+
215
+ ########################################
216
+ # 6. MOTIF SUMMARY FOR TOP TOKENS
217
+ ########################################
218
+
219
+ print("\nBuilding PWM + average PhyloP for top tokens...")
220
+ # Initialize accumulators for ALL tokens
221
+ pwm_sum = {t: np.zeros((L, 4), dtype=np.float32) for t in range(LATENT_DIM)}
222
+ phy_sum = {t: np.zeros(L, dtype=np.float32) for t in range(LATENT_DIM)}
223
+ counts = {t: 0 for t in range(LATENT_DIM)}
224
+
225
+ print("Accumulating statistics (this may take 15-30 mins)...")
226
+
227
+ limit = len(all_token_ids) - L
228
+
229
+ for i in tqdm(range(limit)):
230
+ t = all_token_ids[i]
231
+
232
+ # Always accumulate
233
+ window = combined_np[i : i+L]
234
+
235
+ pwm_sum[t] += window[:, :4]
236
+ phy_sum[t] += window[:, 4]
237
+ counts[t] += 1
238
+
239
+
240
+
241
+ ########################################
242
+ # 6A. Save per-token PWMs & phylo profiles
243
+ ########################################
244
+
245
+ print("Saving profiles...")
246
+
247
+ for t in range(LATENT_DIM):
248
+ if counts[t] == 0:
249
+ continue
250
+
251
+ pwm = pwm_sum[t] / counts[t]
252
+ avg_phy = phy_sum[t] / counts[t]
253
+
254
+ np.save(f"token{t}_pwm.npy", pwm)
255
+ np.save(f"token{t}_phy.npy", avg_phy)
256
+
257
+
258
+ ########################################
259
+ # 6B. Rank tokens by PhyloP and rarity
260
+ ########################################
261
+
262
+ avg_phylop_per_token = np.zeros(LATENT_DIM)
263
+ count_per_token = np.zeros(LATENT_DIM)
264
+
265
+ for t in range(LATENT_DIM):
266
+ if counts[t] > 0:
267
+ avg_phylop_per_token[t] = (phy_sum[t] / counts[t]).mean()
268
+ count_per_token[t] = counts[t]
269
+ else:
270
+ avg_phylop_per_token[t] = -999
271
+ count_per_token[t] = 0
272
+
273
+ # Rank by PhyloP (high to low)
274
+ tokens_by_phylop = np.argsort(avg_phylop_per_token)[::-1]
275
+ top_phy_tokens = tokens_by_phylop[:20]
276
+
277
+ # Rank by rarity (low to high)
278
+ rare_tokens = np.argsort(count_per_token)[:20]
279
+
280
+ print("Top 20 conserved tokens:", top_phy_tokens)
281
+ print("Top 20 rarest tokens:", rare_tokens)
282
+
283
+
284
+
285
+ print("\n=== Extraction Completed Successfully ===")
286
+
extract_log.txt ADDED
The diff for this file is too large to render. See raw diff
 
run_job.sh ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # --- 0. Activate Conda Environment ---
4
+ echo "Activating Conda environment..."
5
+ source /home/n5huang/miniconda3/etc/profile.d/conda.sh
6
+ conda activate dnabert_v2
7
+
8
+ # --- 1. Configuration ---
9
+ # Set the GPU ID
10
+ export CUDA_VISIBLE_DEVICES=6
11
+ echo "Assigned GPU: $CUDA_VISIBLE_DEVICES"
12
+
13
+ # --- 2. Correct Working Directory ---
14
+ # Move to the folder where the script actually lives to ensure relative paths work (if any)
15
+ cd /home/n5huang/dna_token/SparseAE/
16
+
17
+ # --- 3. Run the Python Script ---
18
+ # We use absolute path just to be safe
19
+ # -u ensures logs are written immediately
20
+ echo "Starting Python training..."
21
+ python -u extract.py #run_train.py
run_train.py ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.nn.functional as F
5
+ from torch.utils.data import Dataset, DataLoader, RandomSampler
6
+
7
+ # =====================
8
+ # 1. SETUP & DATA LOADING
9
+ # =====================
10
+ print("Loading sequence / phyloP data...")
11
+
12
+ dna_path = "/home/n5huang/dna_token/SparseAE/chr1_dna.txt"
13
+ phy_path = "/home/n5huang/dna_token/SparseAE/chr1_phyloP_norm.npy"
14
+
15
+ with open(dna_path) as f:
16
+ sequence = f.read().strip()
17
+
18
+ phy_norm = np.load(phy_path)
19
+
20
+ assert len(sequence) == len(phy_norm), "DNA and phyloP length mismatch!"
21
+ chrom_len = len(sequence)
22
+ print(f"Chromosome 1 length: {chrom_len:,} bp")
23
+
24
+ # =====================
25
+ # 2. DNA ENCODING (HANDLE 'N')
26
+ # =====================
27
+ print("Encoding DNA to one-hot (with N handling)...")
28
+
29
+ mapping = {'A': 0, 'C': 1, 'G': 2, 'T': 3}
30
+
31
+ # Map bases to ints, using 4 as "N/unknown"
32
+ dna_int = np.fromiter((mapping.get(b, 4) for b in sequence), dtype=np.int8)
33
+ num_N = np.sum(dna_int == 4)
34
+ print(f"Number of N bases: {num_N:,}")
35
+
36
+ # One-hot with an extra row for N
37
+ # 0=A,1=C,2=G,3=T,4=[0,0,0,0,1]
38
+ temp_onehot = np.eye(5, dtype=np.float32)[dna_int]
39
+
40
+ # Slice to first 4 columns: N -> [0,0,0,0]
41
+ dna_onehot = temp_onehot[:, :4] # shape (chrom_len, 4)
42
+
43
+ # =====================
44
+ # 3. PHYLOP CHECK + COMBINE
45
+ # =====================
46
+ print("Preparing combined tensor...")
47
+
48
+ # Assume phy_norm is already in [-1,1]; warn if not.
49
+ max_abs_phy = np.max(np.abs(phy_norm))
50
+ if max_abs_phy > 1.1:
51
+ print(f"WARNING: phy_norm max abs={max_abs_phy:.3f} > 1.1; "
52
+ f"data may not be normalized as expected.")
53
+
54
+ phy_norm = phy_norm.astype(np.float32)
55
+ phy_col = phy_norm.reshape(-1, 1) # (chrom_len, 1)
56
+
57
+ combined_np = np.concatenate([dna_onehot, phy_col], axis=1) # (chrom_len, 5)
58
+ combined_tensor = torch.from_numpy(combined_np) # CPU tensor
59
+
60
+ print(f"Master tensor shape: {combined_tensor.shape}")
61
+
62
+ # =====================
63
+ # 4. DATASET: CHUNKED WINDOWING
64
+ # =====================
65
+ L = 50
66
+
67
+ class ChunkedChr1Dataset(Dataset):
68
+ def __init__(self, combined, L=50):
69
+ self.combined = combined
70
+ self.L = L
71
+ self.N = combined.shape[0] - L # number of valid start positions
72
+
73
+ def __len__(self):
74
+ return self.N
75
+
76
+ def __getitem__(self, idx):
77
+ # window: (L, 5)
78
+ window = self.combined[idx : idx + self.L]
79
+ dna = window[:, :4] # (L, 4)
80
+ phy = window[:, 4] # (L,)
81
+ return dna, phy
82
+
83
+ dataset = ChunkedChr1Dataset(combined_tensor, L=L)
84
+ print(f"Dataset length (#windows): {len(dataset):,}")
85
+
86
+ # =====================
87
+ # 5. DATALOADER WITH RANDOM SAMPLER
88
+ # =====================
89
+ BATCH_SIZE = 1024
90
+ SAMPLES_PER_EPOCH = 5_000_000 # number of windows per epoch (tunable)
91
+
92
+ sampler = RandomSampler(
93
+ dataset,
94
+ replacement=True,
95
+ num_samples=SAMPLES_PER_EPOCH
96
+ )
97
+
98
+ loader = DataLoader(
99
+ dataset,
100
+ batch_size=BATCH_SIZE,
101
+ sampler=sampler,
102
+ drop_last=True,
103
+ num_workers=0, # safer on large dataset
104
+ pin_memory=True
105
+ )
106
+
107
+ print("DataLoader ready.")
108
+
109
+ # =====================
110
+ # 6. MODEL: SPARSE AUTOENCODER
111
+ # =====================
112
+ INPUT_DIM = L * 5 # 4 DNA + 1 phyloP
113
+ LATENT_DIM = 2048
114
+ HIDDEN_DIM = 1024
115
+
116
+ class SparseAE(nn.Module):
117
+ def __init__(self, input_dim=INPUT_DIM, latent_dim=LATENT_DIM, hidden_dim=HIDDEN_DIM):
118
+ super().__init__()
119
+
120
+ # Encoder
121
+ self.encoder = nn.Sequential(
122
+ nn.Linear(input_dim, hidden_dim),
123
+ nn.ReLU(),
124
+ nn.Linear(hidden_dim, latent_dim),
125
+ nn.ReLU() # ReLU helps sparsity with L1
126
+ )
127
+
128
+ # Decoder shared
129
+ self.dec_hidden = nn.Linear(latent_dim, hidden_dim)
130
+
131
+ # Decoder heads
132
+ self.dec_dna = nn.Linear(hidden_dim, L * 4)
133
+ self.dec_phy = nn.Linear(hidden_dim, L * 1)
134
+
135
+ def forward(self, dna, phy):
136
+ B = dna.size(0)
137
+
138
+ x = torch.cat(
139
+ [dna.reshape(B, -1), phy.reshape(B, -1)],
140
+ dim=1
141
+ ) # (B, INPUT_DIM)
142
+
143
+ h = self.encoder(x)
144
+ dec = F.relu(self.dec_hidden(h))
145
+
146
+ recon_dna = self.dec_dna(dec).reshape(B, L, 4) # (B, L, 4)
147
+ recon_phy = torch.tanh(self.dec_phy(dec)).reshape(B, L) # (B, L)
148
+
149
+ return recon_dna, recon_phy, h
150
+
151
+ # =====================
152
+ # 7. TRAINING LOOP
153
+ # =====================
154
+ device = "cuda" if torch.cuda.is_available() else "cpu"
155
+ print(f"Training on device: {device}")
156
+
157
+ model = SparseAE().to(device)
158
+ optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
159
+
160
+ #lambda_l1 = 0.01 # slightly stronger sparsity
161
+ lambda_l1_start = 0.02
162
+ lambda_l1_end = 0.005
163
+ phy_weight = 10.0
164
+ num_epochs = 5
165
+ PRINT_EVERY = 1000 # batches
166
+ beta_kl_schedule = [0.0, 0.01, 0.02, 0.05, 0.1] # per epoch
167
+
168
+
169
+
170
+ print("Starting training...")
171
+
172
+ for epoch in range(num_epochs):
173
+ model.train()
174
+ total_loss = 0.0
175
+ total_dna = 0.0
176
+ total_phy = 0.0
177
+ total_active = 0.0
178
+
179
+ batch_count = 0
180
+ for dna_batch, phy_batch in loader:
181
+ batch_count += 1
182
+
183
+ dna_batch = dna_batch.to(device, non_blocking=True).float() # (B, L, 4)
184
+ phy_batch = phy_batch.to(device, non_blocking=True).float() # (B, L)
185
+
186
+ optimizer.zero_grad()
187
+
188
+ recon_dna, recon_phy, h = model(dna_batch, phy_batch)
189
+
190
+ # Mask positions that are 'N' (all-zero one-hot)
191
+ mask = dna_batch.sum(dim=-1) > 0 # (B, L), True where valid base
192
+
193
+ # --- DNA loss (masked CE) ---
194
+ true_dna_cls = dna_batch.argmax(dim=-1) # (B, L)
195
+ dna_logits = recon_dna.permute(0, 2, 1) # (B, 4, L)
196
+ loss_dna_raw = F.cross_entropy(dna_logits, true_dna_cls, reduction='none') # (B, L)
197
+
198
+ if mask.sum() > 0:
199
+ loss_dna = (loss_dna_raw * mask).sum() / mask.sum()
200
+ else:
201
+ loss_dna = torch.tensor(0.0, device=device)
202
+
203
+ # --- PhyloP loss (masked MSE) ---
204
+ loss_phy_raw = F.mse_loss(recon_phy, phy_batch, reduction='none') # (B, L)
205
+
206
+ if mask.sum() > 0:
207
+ loss_phy = (loss_phy_raw * mask).sum() / mask.sum()
208
+ else:
209
+ loss_phy = torch.tensor(0.0, device=device)
210
+
211
+ # --- KL sparsity penalty ---
212
+ rho = 0.02 # target sparsity
213
+ eps = 1e-12
214
+ rho_hat = torch.mean(h, dim=0)
215
+ rho_hat = torch.clamp(rho_hat, min=1e-6, max=1-1e-6)
216
+ kl_per_unit = (
217
+ rho * torch.log((rho + eps) / (rho_hat + eps)) +
218
+ (1 - rho) * torch.log(((1 - rho) + eps) / ((1 - rho_hat) + eps))
219
+ )
220
+ beta_kl = beta_kl_schedule[min(epoch, len(beta_kl_schedule)-1)]
221
+ #loss_kl = 1 * kl_per_unit.sum() # β = 1 regularization weight
222
+ loss_kl = beta_kl * kl_per_unit.sum()
223
+
224
+
225
+ lambda_l1 = (
226
+ lambda_l1_start
227
+ + (lambda_l1_end - lambda_l1_start) * (epoch / (num_epochs - 1))
228
+ )
229
+
230
+ # --- L1 sparsity on latent ---
231
+ loss_l1 = lambda_l1 * torch.mean(torch.abs(h))
232
+
233
+ # Total loss
234
+ loss = loss_dna + phy_weight * loss_phy + loss_l1 + loss_kl
235
+
236
+ loss.backward()
237
+ optimizer.step()
238
+
239
+ # Logging accumulators
240
+ B = dna_batch.size(0)
241
+ total_loss += loss.item() * B
242
+ total_dna += loss_dna.item() * B
243
+ total_phy += loss_phy.item() * B
244
+
245
+ # approximate number of active neurons (h > threshold)
246
+ active_count = (h > 0.01).float().sum(dim=1).mean().item()
247
+ total_active += active_count * B
248
+
249
+ if batch_count % PRINT_EVERY == 0:
250
+ print(
251
+ f"Epoch {epoch+1} | Batch {batch_count} | "
252
+ f"Loss={loss.item():.4f} | DNA_CE={loss_dna.item():.4f} | "
253
+ f"Phy_MSE={loss_phy.item():.5f} | Active={active_count:.1f}"
254
+ )
255
+
256
+ # Epoch summary
257
+ N = SAMPLES_PER_EPOCH # effective number of samples this epoch
258
+ avg_loss = total_loss / N
259
+ avg_dna = total_dna / N
260
+ avg_phy = total_phy / N
261
+ avg_active = total_active / N
262
+
263
+ print(f"\n=== Epoch {epoch+1}/{num_epochs} COMPLETE ===")
264
+ print(
265
+ f"Avg Loss={avg_loss:.4f} | Avg DNA_CE={avg_dna:.4f} | "
266
+ f"Avg Phy_MSE={avg_phy:.5f} | "
267
+ f"Avg Active Neurons={avg_active:.1f} / {LATENT_DIM} "
268
+ f"({100.0 * avg_active / LATENT_DIM:.1f}%)"
269
+ )
270
+
271
+ # Save checkpoint
272
+ ckpt_path = f"sparse_ae_50bp_epoch{epoch+1}.pt"
273
+ torch.save(model.state_dict(), ckpt_path)
274
+ print(f"Saved checkpoint to {ckpt_path}\n")
save/sparse_ae_50bp_epoch1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:783330441dbcb8954e41b73c14c9f2c6b5b7d4391b2a34f497efbaffea185061
3
+ size 18845511
save/sparse_ae_50bp_epoch2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f951d20a5e142004926a72c9fcf5eb930b60fecd6c1016bc937f9590ce1acf4f
3
+ size 18845511
save/sparse_ae_50bp_epoch3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8b3568994c3c777b37bea3b781c8d79e8c8883c9fbceee001f6247d7e21def0
3
+ size 18845511
save/sparse_ae_50bp_epoch4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1625cf96716fac10180bff026b4a22bb3141c9d7599ffac03963b908e9310d3
3
+ size 18845511
save/sparse_ae_50bp_epoch5.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9aed9a40b48ddc84c91d5fee7ed1917922a39138c6de011655dc9789d6d94939
3
+ size 18845511
sparse_ae_50bp_epoch1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0524bbaf5d3890d983e6a5a0950700caa1edb079b4ba8d351f10aedda4e4cbfe
3
+ size 18845511
sparse_ae_50bp_epoch2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49e44d486cf65d22baef24d6d35c480e2b0d1ac54216ee62089be54acf28cee8
3
+ size 18845511
sparse_ae_50bp_epoch3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b28ea09c9c217e70bce16a914aacda6d8a864f95e8497eb690e02b7c84e6a036
3
+ size 18845511
sparse_ae_50bp_epoch4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b188cb2204f93983c0942d697f8f04736db3bf56012b9324e507e4d83a4673d
3
+ size 18845511
sparse_ae_50bp_epoch5.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cc638b7535deeb9c4c10bb9a6c47d746ff09d2aa51cf223b2d262ec468a018a
3
+ size 18845511
summarize_tokens.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import glob
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+ LOG_FILE = "extract_log.txt"
7
+
8
+ # 1. Parse N hits from log file
9
+ token_counts = {}
10
+ pattern = re.compile(r"Saved token (\d+) \(N=(\d+)\)")
11
+
12
+ with open(LOG_FILE, "r") as f:
13
+ for line in f:
14
+ m = pattern.search(line)
15
+ if m:
16
+ t = int(m.group(1))
17
+ n = int(m.group(2))
18
+ token_counts[t] = n
19
+
20
+ print(f"Found {len(token_counts)} tokens with counts from log.")
21
+
22
+ # 2. For each token, load PWM + phyloP, compute entropy + avg phyloP
23
+ rows = []
24
+
25
+ def pwm_entropy(pwm, eps=1e-8):
26
+ """
27
+ pwm: (L, 4) array of mean one-hot probs
28
+ returns: mean Shannon entropy across positions, in bits
29
+ """
30
+ p = pwm / (pwm.sum(axis=1, keepdims=True) + eps) # normalize safety
31
+ H = -np.sum(p * np.log2(p + eps), axis=1) # (L,)
32
+ return H.mean()
33
+
34
+ for pwm_path in glob.glob("token*_pwm.npy"):
35
+ # token ID from filename
36
+ m = re.search(r"token(\d+)_pwm\.npy", pwm_path)
37
+ if not m:
38
+ continue
39
+ t = int(m.group(1))
40
+
41
+ pwm = np.load(pwm_path) # (L, 4)
42
+ phy = np.load(f"token{t}_phy.npy") # (L,)
43
+
44
+ H = pwm_entropy(pwm)
45
+ avg_phy = float(phy.mean())
46
+ N_hits = token_counts.get(t, None)
47
+
48
+ rows.append({
49
+ "token_id": t,
50
+ "N_hits": N_hits,
51
+ "pwm_entropy_bits": H,
52
+ "avg_phyloP": avg_phy
53
+ })
54
+
55
+ df = pd.DataFrame(rows)
56
+ df = df.sort_values(["pwm_entropy_bits", "avg_phyloP"], ascending=[True, False])
57
+
58
+ print(df.head(20))
59
+
60
+ df.to_csv("token_summary.tsv", sep="\t", index=False)
61
+ print("\nSaved summary to token_summary.tsv")
62
+
token1248_phy.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0ff04bef228fbb4034063112451e0ff3f61ea5010085c896dde823c80995944
3
+ size 328
token1248_pwm.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48ecfc793f3988fb8ac3f44fceeb7e19c78e65c153d9f8f3d8ef84ba95901fce
3
+ size 928
token1312_phy.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e7f7df3737fb75cf04dba3b9dace53c6288b5df1c8f1504e756795d16ec587e
3
+ size 328
token1312_pwm.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:636a911b43f7f1117d8f1350966a3832b21f4ec02891bd96386cc0e3f297bae9
3
+ size 928
token138_phy.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94821977d4c17a4f60989babf4c9404e650160ce56ef0f109e3ac078e4292aaa
3
+ size 328
token138_pwm.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00274837ae1e7e223ce799a649edb1ca7e01998780ec0b836e731427e1739ef6
3
+ size 928
token1417_phy.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c51f148b5df3cf4bce2fde39242187600d0d8fe47d10094b2dccfd4c3715870
3
+ size 328
token1417_pwm.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a294d6621fc0b33bedbf4580ecf3c50f5ae779e15a0c60365015f4a81b9801bd
3
+ size 928
token1448_phy.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7818a614c3797dff04b89ae64493fc348c32d774a34c068cc6212d097e02cca8
3
+ size 328
token1448_pwm.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:328c466c07fa92f28d23a19accbf8a852f584116a234b9e62c96eec4d1243c0b
3
+ size 928
token1487_phy.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e511783f52ad148ca2fb1eaef348fd19c844b9d8ceb9c9ffef07efeb48097960
3
+ size 328
token1487_pwm.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7859c6c5085e90a0b2fe26c5f63ce02dbb2f9e56e8e20a77ddf664630a4c6849
3
+ size 928
token1494_phy.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc33acaa4223bb72be2190dc196840f870c6c5128ce06cb46c86b7aacc8011d1
3
+ size 328
token1494_pwm.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d05893b76c9a7d253d025f8a09f600ec8e6e78914793dd866ed5aae58028330
3
+ size 928
token1503_phy.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb5354c0f0721175f73d3e5a76f30ca9aa4f76ef20d918ca9faf6b4040c3ce55
3
+ size 328
token1503_pwm.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c4ddd567056af4a22fe33e4783afb65e0037c2fda7778825a7b3efcffbce4c0
3
+ size 928
token1721_phy.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e1b524b3870dd824a06a9113ecd1e08d1f78e133e39cf6700c6734a5d8e487f
3
+ size 328
token1721_pwm.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5df963ff0dc52052663c9122812da7708299e263f749a2accbb6297b42bfd78
3
+ size 928
token175_phy.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d20851e9bed217d47d86f6ef752922ba756a17dcb5f1ba826523161827ea4af0
3
+ size 328
token175_pwm.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5ce7c68421a2db5fc5285296599945917c37be115a0eeda6ec9de965be25624
3
+ size 928
token1831_phy.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68b1ef67ff02631fb4b32165652ef6d97c4edb3b1ca292f03f644098637b821a
3
+ size 328
token1831_pwm.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43dc041a1c64ea71b531c7ab1b1d4ea30bd77b015b6a5d3997dee5ddb00f5fea
3
+ size 928
token192_phy.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98cff173bd0ec8c9d3f02d217a5bfa96ee0cca52d03f624dd29939eaee52a8df
3
+ size 328
token192_pwm.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24c99c84a3db007261a0ab245020b7db930dcc53e8e646e16a77a3bdc31bd259
3
+ size 928
token296_phy.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd142b806afb9d3ecf36bc07eeb4ac05e69f48f64513620059023ad7ea59a5d0
3
+ size 328
token296_pwm.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d9722d77f41184dbbc8089b0c909f00121fb114d33b0b09f5fdacc419aa724d
3
+ size 928
token363_phy.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66a992eb090aed56f742f8b84e2bc36eee9dba0849b5de454f65c67c8d433c83
3
+ size 328
token363_pwm.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd011451a85cc30d6f4aa5123fab4f6a9a401a74a84ae30859ac1e29262c026c
3
+ size 928
token468_phy.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:471273cce914188e0b266f884091cd1b85c8f1c554943bd0d5db3a423c3b6cb5
3
+ size 328
token468_pwm.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84493201ce2efb598ceaf6eb6f86b25168b2a4da76611bee68897a677a732fa1
3
+ size 928