Spaces:
Runtime error
Runtime error
Update model.py
Browse files
model.py
CHANGED
|
@@ -6,12 +6,13 @@ import torch.nn.functional as F
|
|
| 6 |
from typing import List, Dict, Any, Optional
|
| 7 |
from collections import defaultdict
|
| 8 |
from accelerate import Accelerator
|
|
|
|
|
|
|
| 9 |
|
| 10 |
class DynamicModel(nn.Module):
|
| 11 |
def __init__(self, sections: Dict[str, List[Dict[str, Any]]]):
|
| 12 |
super(DynamicModel, self).__init__()
|
| 13 |
self.sections = nn.ModuleDict()
|
| 14 |
-
|
| 15 |
if not sections:
|
| 16 |
sections = {
|
| 17 |
'default': [{
|
|
@@ -22,7 +23,6 @@ class DynamicModel(nn.Module):
|
|
| 22 |
'dropout': 0.1
|
| 23 |
}]
|
| 24 |
}
|
| 25 |
-
|
| 26 |
for section_name, layers in sections.items():
|
| 27 |
self.sections[section_name] = nn.ModuleList()
|
| 28 |
for layer_params in layers:
|
|
@@ -32,10 +32,8 @@ class DynamicModel(nn.Module):
|
|
| 32 |
def create_layer(self, layer_params: Dict[str, Any]) -> nn.Module:
|
| 33 |
layers = []
|
| 34 |
layers.append(nn.Linear(layer_params['input_size'], layer_params['output_size']))
|
| 35 |
-
|
| 36 |
if layer_params.get('batch_norm', False):
|
| 37 |
layers.append(nn.BatchNorm1d(layer_params['output_size']))
|
| 38 |
-
|
| 39 |
activation = layer_params.get('activation', 'relu')
|
| 40 |
if activation == 'relu':
|
| 41 |
layers.append(nn.ReLU(inplace=True))
|
|
@@ -49,23 +47,17 @@ class DynamicModel(nn.Module):
|
|
| 49 |
layers.append(nn.ELU(alpha=1.0, inplace=True))
|
| 50 |
elif activation is not None:
|
| 51 |
raise ValueError(f"Unsupported activation function: {activation}")
|
| 52 |
-
|
| 53 |
if dropout_rate := layer_params.get('dropout', 0.0):
|
| 54 |
layers.append(nn.Dropout(p=dropout_rate))
|
| 55 |
-
|
| 56 |
if hidden_layers := layer_params.get('hidden_layers', []):
|
| 57 |
for hidden_layer_params in hidden_layers:
|
| 58 |
layers.append(self.create_layer(hidden_layer_params))
|
| 59 |
-
|
| 60 |
if layer_params.get('memory_augmentation', True):
|
| 61 |
layers.append(MemoryAugmentationLayer(layer_params['output_size']))
|
| 62 |
-
|
| 63 |
if layer_params.get('hybrid_attention', True):
|
| 64 |
layers.append(HybridAttentionLayer(layer_params['output_size']))
|
| 65 |
-
|
| 66 |
if layer_params.get('dynamic_flash_attention', True):
|
| 67 |
layers.append(DynamicFlashAttentionLayer(layer_params['output_size']))
|
| 68 |
-
|
| 69 |
return nn.Sequential(*layers)
|
| 70 |
|
| 71 |
def forward(self, x: torch.Tensor, section_name: Optional[str] = None) -> torch.Tensor:
|
|
@@ -111,38 +103,30 @@ class DynamicFlashAttentionLayer(nn.Module):
|
|
| 111 |
def parse_xml_file(file_path: str) -> List[Dict[str, Any]]:
|
| 112 |
tree = ET.parse(file_path)
|
| 113 |
root = tree.getroot()
|
| 114 |
-
|
| 115 |
layers = []
|
| 116 |
for layer in root.findall('.//layer'):
|
| 117 |
layer_params = {}
|
| 118 |
layer_params['input_size'] = int(layer.get('input_size', 128))
|
| 119 |
layer_params['output_size'] = int(layer.get('output_size', 256))
|
| 120 |
layer_params['activation'] = layer.get('activation', 'relu').lower()
|
| 121 |
-
|
| 122 |
if layer_params['activation'] not in ['relu', 'tanh', 'sigmoid', 'none']:
|
| 123 |
raise ValueError(f"Unsupported activation function: {layer_params['activation']}")
|
| 124 |
-
|
| 125 |
if layer_params['input_size'] <= 0 or layer_params['output_size'] <= 0:
|
| 126 |
raise ValueError("Layer dimensions must be positive integers")
|
| 127 |
-
|
| 128 |
layers.append(layer_params)
|
| 129 |
-
|
| 130 |
if not layers:
|
| 131 |
layers.append({
|
| 132 |
'input_size': 128,
|
| 133 |
'output_size': 256,
|
| 134 |
'activation': 'relu'
|
| 135 |
})
|
| 136 |
-
|
| 137 |
return layers
|
| 138 |
|
| 139 |
def create_model_from_folder(folder_path: str) -> DynamicModel:
|
| 140 |
sections = defaultdict(list)
|
| 141 |
-
|
| 142 |
if not os.path.exists(folder_path):
|
| 143 |
print(f"Warning: Folder {folder_path} does not exist. Creating model with default configuration.")
|
| 144 |
return DynamicModel({})
|
| 145 |
-
|
| 146 |
xml_files_found = False
|
| 147 |
for root, dirs, files in os.walk(folder_path):
|
| 148 |
for file in files:
|
|
@@ -155,64 +139,59 @@ def create_model_from_folder(folder_path: str) -> DynamicModel:
|
|
| 155 |
sections[section_name].extend(layers)
|
| 156 |
except Exception as e:
|
| 157 |
print(f"Error processing {file_path}: {str(e)}")
|
| 158 |
-
|
| 159 |
if not xml_files_found:
|
| 160 |
print("Warning: No XML files found. Creating model with default configuration.")
|
| 161 |
return DynamicModel({})
|
| 162 |
-
|
| 163 |
return DynamicModel(dict(sections))
|
| 164 |
|
| 165 |
def main():
|
| 166 |
folder_path = 'data'
|
| 167 |
model = create_model_from_folder(folder_path)
|
| 168 |
-
|
| 169 |
print(f"Created dynamic PyTorch model with sections: {list(model.sections.keys())}")
|
| 170 |
-
|
| 171 |
# Print the model architecture
|
| 172 |
print(model)
|
| 173 |
-
|
| 174 |
first_section = next(iter(model.sections.keys()))
|
| 175 |
first_layer = model.sections[first_section][0]
|
| 176 |
input_features = first_layer[0].in_features
|
| 177 |
-
|
| 178 |
# Ensure the input tensor size matches the expected input size
|
| 179 |
sample_input = torch.randn(1, input_features)
|
| 180 |
output = model(sample_input)
|
| 181 |
print(f"Sample output shape: {output.shape}")
|
| 182 |
|
| 183 |
accelerator = Accelerator()
|
| 184 |
-
|
| 185 |
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
|
| 186 |
criterion = nn.CrossEntropyLoss()
|
| 187 |
num_epochs = 10
|
| 188 |
-
|
| 189 |
-
dataset = torch.utils.data.TensorDataset(
|
| 190 |
torch.randn(100, input_features),
|
| 191 |
torch.randint(0, 2, (100,))
|
| 192 |
)
|
| 193 |
-
train_dataloader =
|
| 194 |
-
dataset,
|
| 195 |
-
batch_size=
|
| 196 |
shuffle=True
|
| 197 |
)
|
| 198 |
|
| 199 |
model, optimizer, train_dataloader = accelerator.prepare(
|
| 200 |
-
model,
|
| 201 |
-
optimizer,
|
| 202 |
train_dataloader
|
| 203 |
)
|
| 204 |
|
|
|
|
|
|
|
| 205 |
for epoch in range(num_epochs):
|
| 206 |
model.train()
|
| 207 |
total_loss = 0
|
| 208 |
for batch_idx, (inputs, labels) in enumerate(train_dataloader):
|
| 209 |
optimizer.zero_grad()
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
|
|
|
|
|
|
| 214 |
total_loss += loss.item()
|
| 215 |
-
|
| 216 |
avg_loss = total_loss / len(train_dataloader)
|
| 217 |
print(f"Epoch {epoch+1}/{num_epochs}, Average Loss: {avg_loss:.4f}")
|
| 218 |
|
|
|
|
| 6 |
from typing import List, Dict, Any, Optional
|
| 7 |
from collections import defaultdict
|
| 8 |
from accelerate import Accelerator
|
| 9 |
+
from torch.utils.data import DataLoader, TensorDataset
|
| 10 |
+
from torch.cuda.amp import GradScaler, autocast
|
| 11 |
|
| 12 |
class DynamicModel(nn.Module):
|
| 13 |
def __init__(self, sections: Dict[str, List[Dict[str, Any]]]):
|
| 14 |
super(DynamicModel, self).__init__()
|
| 15 |
self.sections = nn.ModuleDict()
|
|
|
|
| 16 |
if not sections:
|
| 17 |
sections = {
|
| 18 |
'default': [{
|
|
|
|
| 23 |
'dropout': 0.1
|
| 24 |
}]
|
| 25 |
}
|
|
|
|
| 26 |
for section_name, layers in sections.items():
|
| 27 |
self.sections[section_name] = nn.ModuleList()
|
| 28 |
for layer_params in layers:
|
|
|
|
| 32 |
def create_layer(self, layer_params: Dict[str, Any]) -> nn.Module:
|
| 33 |
layers = []
|
| 34 |
layers.append(nn.Linear(layer_params['input_size'], layer_params['output_size']))
|
|
|
|
| 35 |
if layer_params.get('batch_norm', False):
|
| 36 |
layers.append(nn.BatchNorm1d(layer_params['output_size']))
|
|
|
|
| 37 |
activation = layer_params.get('activation', 'relu')
|
| 38 |
if activation == 'relu':
|
| 39 |
layers.append(nn.ReLU(inplace=True))
|
|
|
|
| 47 |
layers.append(nn.ELU(alpha=1.0, inplace=True))
|
| 48 |
elif activation is not None:
|
| 49 |
raise ValueError(f"Unsupported activation function: {activation}")
|
|
|
|
| 50 |
if dropout_rate := layer_params.get('dropout', 0.0):
|
| 51 |
layers.append(nn.Dropout(p=dropout_rate))
|
|
|
|
| 52 |
if hidden_layers := layer_params.get('hidden_layers', []):
|
| 53 |
for hidden_layer_params in hidden_layers:
|
| 54 |
layers.append(self.create_layer(hidden_layer_params))
|
|
|
|
| 55 |
if layer_params.get('memory_augmentation', True):
|
| 56 |
layers.append(MemoryAugmentationLayer(layer_params['output_size']))
|
|
|
|
| 57 |
if layer_params.get('hybrid_attention', True):
|
| 58 |
layers.append(HybridAttentionLayer(layer_params['output_size']))
|
|
|
|
| 59 |
if layer_params.get('dynamic_flash_attention', True):
|
| 60 |
layers.append(DynamicFlashAttentionLayer(layer_params['output_size']))
|
|
|
|
| 61 |
return nn.Sequential(*layers)
|
| 62 |
|
| 63 |
def forward(self, x: torch.Tensor, section_name: Optional[str] = None) -> torch.Tensor:
|
|
|
|
| 103 |
def parse_xml_file(file_path: str) -> List[Dict[str, Any]]:
|
| 104 |
tree = ET.parse(file_path)
|
| 105 |
root = tree.getroot()
|
|
|
|
| 106 |
layers = []
|
| 107 |
for layer in root.findall('.//layer'):
|
| 108 |
layer_params = {}
|
| 109 |
layer_params['input_size'] = int(layer.get('input_size', 128))
|
| 110 |
layer_params['output_size'] = int(layer.get('output_size', 256))
|
| 111 |
layer_params['activation'] = layer.get('activation', 'relu').lower()
|
|
|
|
| 112 |
if layer_params['activation'] not in ['relu', 'tanh', 'sigmoid', 'none']:
|
| 113 |
raise ValueError(f"Unsupported activation function: {layer_params['activation']}")
|
|
|
|
| 114 |
if layer_params['input_size'] <= 0 or layer_params['output_size'] <= 0:
|
| 115 |
raise ValueError("Layer dimensions must be positive integers")
|
|
|
|
| 116 |
layers.append(layer_params)
|
|
|
|
| 117 |
if not layers:
|
| 118 |
layers.append({
|
| 119 |
'input_size': 128,
|
| 120 |
'output_size': 256,
|
| 121 |
'activation': 'relu'
|
| 122 |
})
|
|
|
|
| 123 |
return layers
|
| 124 |
|
| 125 |
def create_model_from_folder(folder_path: str) -> DynamicModel:
|
| 126 |
sections = defaultdict(list)
|
|
|
|
| 127 |
if not os.path.exists(folder_path):
|
| 128 |
print(f"Warning: Folder {folder_path} does not exist. Creating model with default configuration.")
|
| 129 |
return DynamicModel({})
|
|
|
|
| 130 |
xml_files_found = False
|
| 131 |
for root, dirs, files in os.walk(folder_path):
|
| 132 |
for file in files:
|
|
|
|
| 139 |
sections[section_name].extend(layers)
|
| 140 |
except Exception as e:
|
| 141 |
print(f"Error processing {file_path}: {str(e)}")
|
|
|
|
| 142 |
if not xml_files_found:
|
| 143 |
print("Warning: No XML files found. Creating model with default configuration.")
|
| 144 |
return DynamicModel({})
|
|
|
|
| 145 |
return DynamicModel(dict(sections))
|
| 146 |
|
| 147 |
def main():
|
| 148 |
folder_path = 'data'
|
| 149 |
model = create_model_from_folder(folder_path)
|
|
|
|
| 150 |
print(f"Created dynamic PyTorch model with sections: {list(model.sections.keys())}")
|
|
|
|
| 151 |
# Print the model architecture
|
| 152 |
print(model)
|
|
|
|
| 153 |
first_section = next(iter(model.sections.keys()))
|
| 154 |
first_layer = model.sections[first_section][0]
|
| 155 |
input_features = first_layer[0].in_features
|
|
|
|
| 156 |
# Ensure the input tensor size matches the expected input size
|
| 157 |
sample_input = torch.randn(1, input_features)
|
| 158 |
output = model(sample_input)
|
| 159 |
print(f"Sample output shape: {output.shape}")
|
| 160 |
|
| 161 |
accelerator = Accelerator()
|
|
|
|
| 162 |
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
|
| 163 |
criterion = nn.CrossEntropyLoss()
|
| 164 |
num_epochs = 10
|
| 165 |
+
dataset = TensorDataset(
|
|
|
|
| 166 |
torch.randn(100, input_features),
|
| 167 |
torch.randint(0, 2, (100,))
|
| 168 |
)
|
| 169 |
+
train_dataloader = DataLoader(
|
| 170 |
+
dataset,
|
| 171 |
+
batch_size=8, # Reduced batch size
|
| 172 |
shuffle=True
|
| 173 |
)
|
| 174 |
|
| 175 |
model, optimizer, train_dataloader = accelerator.prepare(
|
| 176 |
+
model,
|
| 177 |
+
optimizer,
|
| 178 |
train_dataloader
|
| 179 |
)
|
| 180 |
|
| 181 |
+
scaler = GradScaler() # Mixed precision training
|
| 182 |
+
|
| 183 |
for epoch in range(num_epochs):
|
| 184 |
model.train()
|
| 185 |
total_loss = 0
|
| 186 |
for batch_idx, (inputs, labels) in enumerate(train_dataloader):
|
| 187 |
optimizer.zero_grad()
|
| 188 |
+
with autocast(): # Mixed precision training
|
| 189 |
+
outputs = model(inputs)
|
| 190 |
+
loss = criterion(outputs, labels)
|
| 191 |
+
scaler.scale(loss).backward()
|
| 192 |
+
scaler.step(optimizer)
|
| 193 |
+
scaler.update()
|
| 194 |
total_loss += loss.item()
|
|
|
|
| 195 |
avg_loss = total_loss / len(train_dataloader)
|
| 196 |
print(f"Epoch {epoch+1}/{num_epochs}, Average Loss: {avg_loss:.4f}")
|
| 197 |
|