code5ecure commited on
Commit
c4d2e49
·
verified ·
1 Parent(s): c548cb1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -18
app.py CHANGED
@@ -6,10 +6,17 @@ from opacus import PrivacyEngine
6
  from torch.utils.data import Dataset
7
  from torch.optim import AdamW
8
 
 
 
 
 
 
 
 
9
  # Load ParsBERT model and tokenizer
10
  model_name = "HooshvareLab/bert-base-parsbert-uncased"
11
  tokenizer = AutoTokenizer.from_pretrained(model_name)
12
- model = BertLMHeadModel.from_pretrained(model_name, is_decoder=True)
13
 
14
  # Differential Privacy parameters
15
  epsilon = 1.0 # Privacy budget
@@ -29,7 +36,7 @@ class ChatDataset(Dataset):
29
  return len(self.data)
30
 
31
  def __getitem__(self, idx):
32
- item = {key: val[idx] for key, val in self.encodings.items()}
33
  item["labels"] = item["input_ids"].clone()
34
  return item
35
 
@@ -80,23 +87,26 @@ def train_model():
80
  model.train()
81
 
82
  # Add differential privacy
83
- privacy_engine = PrivacyEngine(secure_mode=False) # False for experimentation
84
- private_model, private_optimizer, train_dataloader = privacy_engine.make_private(
85
- module=model,
86
- optimizer=optimizer,
87
- data_loader=trainer.get_train_dataloader(),
88
- noise_multiplier=1.1,
89
- max_grad_norm=1.0,
90
- )
 
91
 
92
- trainer.optimizer = private_optimizer
93
- trainer.train_dataloader = train_dataloader
94
- trainer.model = private_model # Update trainer to use private model
95
 
96
- trainer.train()
97
- model.save_pretrained("./fine_tuned_model")
98
- tokenizer.save_pretrained("./fine_tuned_model")
99
- print("Model training completed and saved to ./fine_tuned_model")
 
 
100
 
101
  def add_noise(tensor, sensitivity, epsilon, delta):
102
  """Add Laplace noise for differential privacy."""
@@ -117,7 +127,7 @@ def chat(message, history):
117
  model.eval()
118
 
119
  # Tokenize input
120
- inputs = tokenizer(message, return_tensors="pt", padding=True, truncation=True, max_length=128).to(model.device)
121
 
122
  # Generate response with model using beam search
123
  with torch.no_grad():
 
6
  from torch.utils.data import Dataset
7
  from torch.optim import AdamW
8
 
9
+ # Disable torch.compile to avoid meta device issues
10
+ torch._dynamo.config.suppress_errors = True
11
+ torch.set_default_dtype(torch.float32)
12
+
13
+ # Set device explicitly
14
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
+
16
  # Load ParsBERT model and tokenizer
17
  model_name = "HooshvareLab/bert-base-parsbert-uncased"
18
  tokenizer = AutoTokenizer.from_pretrained(model_name)
19
+ model = BertLMHeadModel.from_pretrained(model_name, is_decoder=True).to(device)
20
 
21
  # Differential Privacy parameters
22
  epsilon = 1.0 # Privacy budget
 
36
  return len(self.data)
37
 
38
  def __getitem__(self, idx):
39
+ item = {key: val[idx].to(device) for key, val in self.encodings.items()}
40
  item["labels"] = item["input_ids"].clone()
41
  return item
42
 
 
87
  model.train()
88
 
89
  # Add differential privacy
90
+ try:
91
+ privacy_engine = PrivacyEngine(secure_mode=False) # False for experimentation
92
+ private_model, private_optimizer, train_dataloader = privacy_engine.make_private(
93
+ module=model,
94
+ optimizer=optimizer,
95
+ data_loader=trainer.get_train_dataloader(),
96
+ noise_multiplier=1.1,
97
+ max_grad_norm=1.0,
98
+ )
99
 
100
+ trainer.optimizer = private_optimizer
101
+ trainer.train_dataloader = train_dataloader
102
+ trainer.model = private_model # Update trainer to use private model
103
 
104
+ trainer.train()
105
+ model.save_pretrained("./fine_tuned_model")
106
+ tokenizer.save_pretrained("./fine_tuned_model")
107
+ print("Model training completed and saved to ./fine_tuned_model")
108
+ except Exception as e:
109
+ print(f"Error during training with differential privacy: {e}")
110
 
111
  def add_noise(tensor, sensitivity, epsilon, delta):
112
  """Add Laplace noise for differential privacy."""
 
127
  model.eval()
128
 
129
  # Tokenize input
130
+ inputs = tokenizer(message, return_tensors="pt", padding=True, truncation=True, max_length=128).to(device)
131
 
132
  # Generate response with model using beam search
133
  with torch.no_grad():