gary-boon Claude commited on
Commit
7dd568f
·
1 Parent(s): ed40a9a

Fix model info endpoint for Code Llama compatibility

Browse files

Fixed AttributeError when accessing model config attributes:
- CodeGen uses: n_layer, n_head, n_embd, n_positions
- Llama/Code Llama uses: num_hidden_layers, num_attention_heads, hidden_size, max_position_embeddings

Changes:
- Use getattr() with fallbacks to handle both config schemas
- Return dynamic model name from manager.model_name instead of hardcoded value
- Handle different activation function and layer norm attribute names

This fixes the 500 Internal Server Error on GPU backend when loading Code Llama 7B.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show
  1. backend/model_service.py +20 -12
backend/model_service.py CHANGED
@@ -855,30 +855,38 @@ async def model_info(authenticated: bool = Depends(verify_api_key)):
855
  """Get detailed information about the loaded model"""
856
  if not manager.model:
857
  raise HTTPException(status_code=503, detail="Model not loaded")
858
-
859
  config = manager.model.config
860
-
861
  # Calculate total parameters
862
  total_params = sum(p.numel() for p in manager.model.parameters())
863
  trainable_params = sum(p.numel() for p in manager.model.parameters() if p.requires_grad)
864
-
 
 
 
 
 
 
 
 
865
  return {
866
- "name": "Salesforce/codegen-350M-mono",
867
  "type": config.model_type,
868
  "totalParams": total_params,
869
  "trainableParams": trainable_params,
870
- "layers": config.n_layer,
871
- "heads": config.n_head,
872
- "hiddenSize": config.n_embd,
873
  "vocabSize": config.vocab_size,
874
- "maxPositions": config.n_positions,
875
  "architecture": manager.model.__class__.__name__,
876
  "device": str(manager.device),
877
  "dtype": str(next(manager.model.parameters()).dtype),
878
  "accessible": [
879
  f"Token probabilities (all {config.vocab_size})",
880
- f"Attention weights ({config.n_layer} layers × {config.n_head} heads = {config.n_layer * config.n_head} patterns)",
881
- f"Hidden states (all {config.n_layer} layers)",
882
  "Logits before softmax",
883
  "Token embeddings",
884
  "Position embeddings (RoPE)",
@@ -888,8 +896,8 @@ async def model_info(authenticated: bool = Depends(verify_api_key)):
888
  "Activation functions (GELU)"
889
  ],
890
  "config": {
891
- "activation_function": config.activation_function,
892
- "layer_norm_epsilon": config.layer_norm_epsilon,
893
  "tie_word_embeddings": config.tie_word_embeddings,
894
  "rotary_dim": config.rotary_dim if hasattr(config, 'rotary_dim') else None,
895
  "use_cache": config.use_cache
 
855
  """Get detailed information about the loaded model"""
856
  if not manager.model:
857
  raise HTTPException(status_code=503, detail="Model not loaded")
858
+
859
  config = manager.model.config
860
+
861
  # Calculate total parameters
862
  total_params = sum(p.numel() for p in manager.model.parameters())
863
  trainable_params = sum(p.numel() for p in manager.model.parameters() if p.requires_grad)
864
+
865
+ # Handle different config attribute names across model architectures
866
+ # CodeGen uses: n_layer, n_head, n_embd, n_positions
867
+ # Llama/Code Llama uses: num_hidden_layers, num_attention_heads, hidden_size, max_position_embeddings
868
+ num_layers = getattr(config, 'num_hidden_layers', getattr(config, 'n_layer', 0))
869
+ num_heads = getattr(config, 'num_attention_heads', getattr(config, 'n_head', 0))
870
+ hidden_size = getattr(config, 'hidden_size', getattr(config, 'n_embd', 0))
871
+ max_positions = getattr(config, 'max_position_embeddings', getattr(config, 'n_positions', 0))
872
+
873
  return {
874
+ "name": manager.model_name,
875
  "type": config.model_type,
876
  "totalParams": total_params,
877
  "trainableParams": trainable_params,
878
+ "layers": num_layers,
879
+ "heads": num_heads,
880
+ "hiddenSize": hidden_size,
881
  "vocabSize": config.vocab_size,
882
+ "maxPositions": max_positions,
883
  "architecture": manager.model.__class__.__name__,
884
  "device": str(manager.device),
885
  "dtype": str(next(manager.model.parameters()).dtype),
886
  "accessible": [
887
  f"Token probabilities (all {config.vocab_size})",
888
+ f"Attention weights ({num_layers} layers × {num_heads} heads = {num_layers * num_heads} patterns)",
889
+ f"Hidden states (all {num_layers} layers)",
890
  "Logits before softmax",
891
  "Token embeddings",
892
  "Position embeddings (RoPE)",
 
896
  "Activation functions (GELU)"
897
  ],
898
  "config": {
899
+ "activation_function": getattr(config, 'activation_function', getattr(config, 'hidden_act', 'unknown')),
900
+ "layer_norm_epsilon": getattr(config, 'layer_norm_epsilon', getattr(config, 'rms_norm_eps', 1e-5)),
901
  "tie_word_embeddings": config.tie_word_embeddings,
902
  "rotary_dim": config.rotary_dim if hasattr(config, 'rotary_dim') else None,
903
  "use_cache": config.use_cache