ggunio commited on
Commit
85b3a58
Β·
verified Β·
1 Parent(s): 2068c6b

Fix: Extend max_length for proper text reconstruction

Browse files
Files changed (1) hide show
  1. app.py +7 -4
app.py CHANGED
@@ -105,13 +105,16 @@ class B2NLTokenizer:
105
 
106
  # Reconstruct (full text, not truncated)
107
  with torch.no_grad():
108
- reconstructed = self.model.generate(text, temperature=temperature, max_length=48)
 
109
 
110
- # For long texts, process multiple chunks
 
 
111
  if text_bytes > 48:
112
- # Process with sliding window
 
113
  full_reconstruction = reconstructed
114
- # Note: Current implementation may truncate, this is a known limitation
115
  else:
116
  full_reconstruction = reconstructed
117
 
 
105
 
106
  # Reconstruct (full text, not truncated)
107
  with torch.no_grad():
108
+ # Calculate appropriate max_length based on input
109
+ max_gen_length = max(48, min(len(text) + 10, 512)) # Allow some extra space
110
 
111
+ reconstructed = self.model.generate(text, temperature=temperature, max_length=max_gen_length)
112
+
113
+ # For long texts, ensure we get full reconstruction
114
  if text_bytes > 48:
115
+ # Current model limitation: may not fully reconstruct very long texts
116
+ # This is due to sliding window processing
117
  full_reconstruction = reconstructed
 
118
  else:
119
  full_reconstruction = reconstructed
120