Spaces:

Shree2604
/

BioStack

Sleeping

App Files Files Community

AE-Shree commited on Feb 24

Commit

58b68f2

1 Parent(s): 1537418

Deploy BioStack RLHF Medical Demo

Browse files

Files changed (1) hide show

server.py +43 -151

server.py CHANGED Viewed

@@ -12,6 +12,17 @@ from torchvision import transforms
 from transformers import T5ForConditionalGeneration, T5Tokenizer
 from huggingface_hub import hf_hub_download
 # ─────────────────────────────────────────────────────────────────────────────
 # DEVICE
 # ─────────────────────────────────────────────────────────────────────────────
@@ -19,17 +30,44 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"🖥️  Using device: {device}")
 # ─────────────────────────────────────────────────────────────────────────────
-# SHARED TOKENIZER
 # ─────────────────────────────────────────────────────────────────────────────
-tokenizer = T5Tokenizer.from_pretrained("t5-small", legacy=False)
 # ─────────────────────────────────────────────────────────────────────────────
 # ARCHITECTURE 1 — CoAtNet Encoder  (shared by all three models)
 # Matches BOTH notebooks exactly.
 # ─────────────────────────────────────────────────────────────────────────────
 class CoAtNetEncoder(nn.Module):
-    def __init__(self, model_name="coatnet_1_rw_224", pretrained=False, train_last_stages=2):
         super().__init__()
         # pretrained=False at inference time — weights come from .pt file
         self.backbone = timm.create_model(model_name, pretrained=pretrained)
@@ -337,38 +375,6 @@ def preprocess(file_bytes: bytes) -> torch.Tensor:
     return transform(img).unsqueeze(0).to(device)   # [1, 3, 224, 224]
-# ─────────────────────────────────────────────────────────────────────────────
-# DEBUGGING TOOLS - Compare with Jupyter notebook results
-# ─────────────────────────────────────────────────────────────────────────────
-import hashlib
-def get_model_hash(model):
-    """Get hash of model state dict for comparison"""
-    model_str = str(model.state_dict())
-    return hashlib.md5(model_str.encode()).hexdigest()
-def log_inference_details(model_name, image_tensor, generated_ids, decoded_report):
-    """Detailed logging for debugging inference differences"""
-    print(f"\n{'='*50}")
-    print(f" {model_name} INFERENCE DEBUG")
-    print(f"{'='*50}")
-    print(f"Model hash: {get_model_hash(globals()[f'{model_name.lower()}_model'])}")
-    print(f"Image tensor shape: {image_tensor.shape}")
-    print(f"Image tensor mean: {image_tensor.mean():.6f}")
-    print(f"Image tensor std: {image_tensor.std():.6f}")
-    print(f"Model in eval mode: {not globals()[f'{model_name.lower()}_model'].training}")
-    print(f"Generated IDs: {generated_ids}")
-    print(f"Generated IDs shape: {generated_ids.shape}")
-    print(f"Decoded report: '{decoded_report}'")
-    print(f"Report length: {len(decoded_report)} chars")
-    print(f"{'='*50}\n")
-# Set consistent random seeds for reproducible results
-torch.manual_seed(42)
-torch.cuda.manual_seed_all(42)
-print(" Random seeds set to 42 for reproducible results")
 # ─────────────────────────────────────────────────────────────────────────────
 # REWARD FEEDBACK GENERATOR
 # ─────────────────────────────────────────────────────────────────────────────
@@ -422,72 +428,8 @@ def health():
 async def sft_inference(file: UploadFile = File(...)):
     try:
         tensor = preprocess(await file.read())
-        # Enhanced debugging - capture generation details
-        print(f"\n🔍 [SFT] DETAILED INFERENCE ANALYSIS")
-        print(f"{'='*60}")
-        print(f"Model checkpoint: {SFT_MODEL_PATH}")
-        print(f"Image tensor shape: {tensor.shape}")
-        print(f"Image tensor device: {tensor.device}")
-        print(f"Image tensor mean: {tensor.mean():.6f}")
-        print(f"Image tensor std: {tensor.std():.6f}")
-        print(f"Model in eval mode: {not sft_model.training}")
-        print(f"Using torch.no_grad: True")
-        # Get raw generation output before decoding
-        with torch.no_grad():
-            img_features = sft_model.img_encoder(tensor)
-            img_emb = sft_model.img_proj(img_features).unsqueeze(1)
-            batch_size = tensor.size(0)
-            img_attn = torch.ones(batch_size, 1, device=tensor.device)
-            encoder_outputs = sft_model.txt_model.encoder(
-                inputs_embeds=img_emb,
-                attention_mask=img_attn
-            )
-            # Log generation parameters
-            print(f"Generation parameters:")
-            print(f"  - max_length: 128")
-            print(f"  - num_beams: 4")
-            print(f"  - early_stopping: True")
-            print(f"  - no_repeat_ngram_size: 3")
-            print(f"  - repetition_penalty: 1.3")
-            print(f"  - do_sample: False")
-            print(f"  - temperature: N/A (deterministic)")
-            generated = sft_model.txt_model.generate(
-                encoder_outputs=encoder_outputs,
-                attention_mask=img_attn,
-                max_length=128,
-                num_beams=4,
-                early_stopping=True,
-                no_repeat_ngram_size=3,
-                repetition_penalty=1.3,
-            )
-            print(f"Raw generated IDs: {generated}")
-            print(f"Generated IDs shape: {generated.shape}")
-            # Decode with same parameters as notebook
-            reports = tokenizer.batch_decode(generated, skip_special_tokens=True)
-            # Apply same post-processing
-            cleaned_reports = []
-            for r in reports:
-                if r.lower().startswith("projection:"):
-                    parts = r.split(".", 1)
-                    r = parts[1].strip() if len(parts) > 1 else r
-                cleaned_reports.append(r)
-            report = cleaned_reports[0]
-            print(f"Decoded report: '{report}'")
-            print(f"Report length: {len(report)} chars")
-            print(f"Model hash: {get_model_hash(sft_model)}")
-            print(f"{'='*60}\n")
-        print(f"[SFT] Final Generated: {report}")
         return {"report": report[:81]}
     except Exception as e:
         traceback.print_exc()
@@ -570,56 +512,6 @@ async def ppo_inference(file: UploadFile = File(...)):
 # DIAGNOSTIC ENDPOINT — call GET /debug_keys to verify key names in your files
 # e.g.  curl http://localhost:8000/debug_keys
 # ─────────────────────────────────────────────────────────────────────────────
-@app.get("/debug_compare")
-def debug_compare():
-    """
-    Special endpoint to debug inference differences.
-    Returns detailed comparison data for troubleshooting.
-    """
-    import os
-    comparison_data = {
-        "server_info": {
-            "device": str(device),
-            "torch_version": torch.__version__,
-            "transformers_version": transformers.__version__,
-            "random_seed": 42,
-            "models_loaded": {
-                "SFT": os.path.basename(SFT_MODEL_PATH),
-                "Reward": os.path.basename(REWARD_MODEL_PATH),
-                "PPO": os.path.basename(PPO_MODEL_PATH)
-            }
-        },
-        "model_hashes": {
-            "SFT": get_model_hash(sft_model),
-            "Reward": get_model_hash(reward_model),
-            "PPO": get_model_hash(ppo_model)
-        },
-        "generation_params": {
-            "max_length": 128,
-            "num_beams": 4,
-            "early_stopping": True,
-            "no_repeat_ngram_size": 3,
-            "repetition_penalty": 1.3,
-            "do_sample": False,
-            "temperature": "N/A (deterministic)"
-        },
-        "preprocessing": {
-            "resize": [224, 224],
-            "normalize_mean": [0.485, 0.456, 0.406],
-            "normalize_std": [0.229, 0.224, 0.225],
-            "convert": "RGB"
-        },
-        "model_states": {
-            "SFT_eval": not sft_model.training,
-            "Reward_eval": not reward_model.training,
-            "PPO_eval": not ppo_model.training
-        }
-    }
-    return comparison_data
 @app.get("/debug_keys")
 def debug_keys():
     import os

 from transformers import T5ForConditionalGeneration, T5Tokenizer
 from huggingface_hub import hf_hub_download
+# ─────────────────────────────────────────────────────────────────────────────
+# CONFIGURATION
+# ─────────────────────────────────────────────────────────────────────────────
+CONFIG = {
+    'coatnet_model': 'coatnet_1_rw_224',
+    't5_model': 't5-small',
+    'img_emb_dim': 768,
+    'train_last_stages': 2,
+    'image_size': 224,
+}
 # ─────────────────────────────────────────────────────────────────────────────
 # DEVICE
 # ─────────────────────────────────────────────────────────────────────────────
 print(f"🖥️  Using device: {device}")
 # ─────────────────────────────────────────────────────────────────────────────
+# SECTION 7: Load Tokenizer and Image Transform
 # ─────────────────────────────────────────────────────────────────────────────
+print("\n" + "="*80)
+print("LOADING TOKENIZER AND IMAGE TRANSFORM")
+print("="*80)
+# Load tokenizer
+tokenizer = T5Tokenizer.from_pretrained(CONFIG['t5_model'])
+print(f"✓ Loaded tokenizer: {CONFIG['t5_model']}")
+# Define image transform
+transform = transforms.Compose([
+    transforms.Resize((CONFIG['image_size'], CONFIG['image_size'])),
+    transforms.ToTensor(),
+    transforms.Normalize(
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]
+    )
+])
+print(f"✓ Image transform defined (size: {CONFIG['image_size']}x{CONFIG['image_size']})")
+def preprocess_image(image_path: str) -> torch.Tensor:
+    """Load and preprocess image."""
+    image = Image.open(image_path).convert('RGB')
+    return transform(image)
 # ─────────────────────────────────────────────────────────────────────────────
 # ARCHITECTURE 1 — CoAtNet Encoder  (shared by all three models)
 # Matches BOTH notebooks exactly.
 # ─────────────────────────────────────────────────────────────────────────────
 class CoAtNetEncoder(nn.Module):
+    def __init__(self, model_name=None, pretrained=False, train_last_stages=None):
         super().__init__()
+        # Use CONFIG defaults if not specified
+        model_name = model_name or CONFIG['coatnet_model']
+        train_last_stages = train_last_stages or CONFIG['train_last_stages']
         # pretrained=False at inference time — weights come from .pt file
         self.backbone = timm.create_model(model_name, pretrained=pretrained)
     return transform(img).unsqueeze(0).to(device)   # [1, 3, 224, 224]
 # ─────────────────────────────────────────────────────────────────────────────
 # REWARD FEEDBACK GENERATOR
 # ─────────────────────────────────────────────────────────────────────────────
 async def sft_inference(file: UploadFile = File(...)):
     try:
         tensor = preprocess(await file.read())
+        report = sft_model.generate_reports(tensor)[0]
+        print(f"[SFT] Generated: {report}")
         return {"report": report[:81]}
     except Exception as e:
         traceback.print_exc()
 # DIAGNOSTIC ENDPOINT — call GET /debug_keys to verify key names in your files
 # e.g.  curl http://localhost:8000/debug_keys
 # ─────────────────────────────────────────────────────────────────────────────
 @app.get("/debug_keys")
 def debug_keys():
     import os