Fix: Extend max_length for proper text reconstruction
Browse files
app.py
CHANGED
|
@@ -105,13 +105,16 @@ class B2NLTokenizer:
|
|
| 105 |
|
| 106 |
# Reconstruct (full text, not truncated)
|
| 107 |
with torch.no_grad():
|
| 108 |
-
|
|
|
|
| 109 |
|
| 110 |
-
|
|
|
|
|
|
|
| 111 |
if text_bytes > 48:
|
| 112 |
-
#
|
|
|
|
| 113 |
full_reconstruction = reconstructed
|
| 114 |
-
# Note: Current implementation may truncate, this is a known limitation
|
| 115 |
else:
|
| 116 |
full_reconstruction = reconstructed
|
| 117 |
|
|
|
|
| 105 |
|
| 106 |
# Reconstruct (full text, not truncated)
|
| 107 |
with torch.no_grad():
|
| 108 |
+
# Calculate appropriate max_length based on input
|
| 109 |
+
max_gen_length = max(48, min(len(text) + 10, 512)) # Allow some extra space
|
| 110 |
|
| 111 |
+
reconstructed = self.model.generate(text, temperature=temperature, max_length=max_gen_length)
|
| 112 |
+
|
| 113 |
+
# For long texts, ensure we get full reconstruction
|
| 114 |
if text_bytes > 48:
|
| 115 |
+
# Current model limitation: may not fully reconstruct very long texts
|
| 116 |
+
# This is due to sliding window processing
|
| 117 |
full_reconstruction = reconstructed
|
|
|
|
| 118 |
else:
|
| 119 |
full_reconstruction = reconstructed
|
| 120 |
|