File size: 15,350 Bytes
d0dd276
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
import time
from fastapi import APIRouter, Depends, Request, HTTPException
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
from typing import List, Dict, Any, Optional
from app.vertex.auth import get_api_key, validate_api_key
from app.vertex.model_loader import get_vertex_models, get_vertex_express_models, refresh_models_config_cache
import app.vertex.config as app_config
from app.vertex.credentials_manager import CredentialManager
from app.utils.logging import vertex_log
from app.config import settings

router = APIRouter(prefix="/models")

# Setup security
security = HTTPBearer()

@router.get("/list")
async def list_models(credentials: HTTPAuthorizationCredentials = Depends(security)):
    """
    List available models for Vertex generation.
    
    Returns a list of models in OpenAI-compatible format.
    """
    # Validate API key
    api_key = validate_api_key(credentials)
    if not api_key:
        raise HTTPException(status_code=401, detail="Invalid API key")
    
    # Get available models
    vertex_log('info', "Retrieving list of available models")
    standard_models = await get_vertex_models()
    express_models = await get_vertex_express_models()
    
    # Combine and format
    all_models = []
    
    # Format standard models
    for model_name in standard_models:
        all_models.append({
            "id": model_name,
            "object": "model",
            "created": 1677610602,  # Placeholder timestamp
            "owned_by": "google",
            "permission": [],
            "root": model_name,
            "parent": None,
            "pricing": {
                "prompt": "0.0020",  # Placeholder pricing
                "completion": "0.0020"  # Placeholder pricing
            }
        })
    
    # Format express models
    for model_name in express_models:
        all_models.append({
            "id": model_name,
            "object": "model",
            "created": 1677610602,  # Placeholder timestamp
            "owned_by": "google-express",
            "permission": [],
            "root": model_name,
            "parent": None,
            "pricing": {
                "prompt": "0.0010",  # Lower placeholder pricing for express
                "completion": "0.0010"  # Lower placeholder pricing for express
            }
        })
    
    # Add encryption variants if available
    for base_model in standard_models:
        encrypt_model_name = f"{base_model}-encrypt-full"
        all_models.append({
            "id": encrypt_model_name,
            "object": "model",
            "created": 1677610602,  # Placeholder timestamp
            "owned_by": "google-secure",
            "permission": [],
            "root": base_model,
            "parent": base_model,
            "pricing": {
                "prompt": "0.0025",  # Higher placeholder pricing for encrypt
                "completion": "0.0025"  # Higher placeholder pricing for encrypt
            }
        })
    
    vertex_log('info', f"Found {len(all_models)} available models")
    
    return {
        "object": "list",
        "data": all_models
    }

@router.post("/refresh")
async def refresh_models(credentials: HTTPAuthorizationCredentials = Depends(security)):
    """
    Refresh the models configuration cache.
    
    Forces a refresh of the models available from the configuration source.
    """
    # Validate API key
    api_key = validate_api_key(credentials)
    if not api_key:
        raise HTTPException(status_code=401, detail="Invalid API key")
    
    vertex_log('info', "Attempting to refresh models configuration")
    
    # Attempt refresh
    success = await refresh_models_config_cache()
    
    if success:
        # Get updated counts
        standard_models = await get_vertex_models()
        express_models = await get_vertex_express_models()
        
        vertex_log('info', f"Models refresh successful. Standard: {len(standard_models)}, Express: {len(express_models)}")
        
        return {
            "success": True,
            "message": "Models configuration refreshed successfully",
            "models_count": {
                "standard": len(standard_models),
                "express": len(express_models),
                "total": len(standard_models) + len(express_models)
            }
        }
    else:
        vertex_log('error', "Failed to refresh models configuration")
        
        raise HTTPException(
            status_code=500,
            detail="Failed to refresh models configuration"
        )

@router.get("/v1/models")
async def list_models(fastapi_request: Request, api_key: str = Depends(get_api_key)):
    await refresh_models_config_cache()
    
    OPENAI_DIRECT_SUFFIX = "-openai"
    EXPERIMENTAL_MARKER = "-exp-"
    PAY_PREFIX = "[PAY]"
    
    # 获取credential_manager,如果不存在则创建一个新的
    try:
        credential_manager_instance = fastapi_request.app.state.credential_manager
        vertex_log('info', "Using existing credential manager from app state")
    except AttributeError:
        # 如果app.state中没有credential_manager,则创建一个新的
        vertex_log('warning', "No credential_manager found in app.state, creating a new one")
        credential_manager_instance = CredentialManager()

    # 检查是否有SA凭证和Express Key
    has_sa_creds = credential_manager_instance.get_total_credentials() > 0
    
    # 优先从settings中获取EXPRESS API KEY
    has_express_key = False
    if hasattr(settings, 'VERTEX_EXPRESS_API_KEY') and settings.VERTEX_EXPRESS_API_KEY:
        vertex_log('info', "使用settings中的VERTEX_EXPRESS_API_KEY")
        has_express_key = True
    elif app_config.VERTEX_EXPRESS_API_KEY_VAL:
        vertex_log('info', "使用app_config中的VERTEX_EXPRESS_API_KEY_VAL")
        has_express_key = True

    raw_vertex_models = await get_vertex_models()
    raw_express_models = await get_vertex_express_models()
    
    candidate_model_ids = set()

    if has_express_key:
        candidate_model_ids.update(raw_express_models)
        # If *only* express key is available, only express models (and their variants) should be listed.
        # The current `vertex_model_ids` from remote config might contain non-express models.
        # The `get_vertex_express_models()` should be the source of truth for express-eligible base models.
        if not has_sa_creds:
            # Only list models that are explicitly in the express list.
            # Suffix generation will apply only to these if they are not gemini-2.0
            all_model_ids = set(raw_express_models)
        else:
            # Both SA and Express are available, combine all known models
            all_model_ids = set(raw_vertex_models + raw_express_models)
    elif has_sa_creds:
        # Only SA creds available, use all vertex_models (which might include express-eligible ones)
        all_model_ids = set(raw_vertex_models)
    else:
        # No credentials available
        all_model_ids = set()
    
    # Create extended model list with variations (search, encrypt, auto etc.)
    # This logic might need to be more sophisticated based on actual supported features per base model.
    # For now, let's assume for each base model, we might have these variations.
    # A better approach would be if the remote config specified these variations.
    
    dynamic_models_data: List[Dict[str, Any]] = []
    current_time = int(time.time())

    # Add base models and their variations
    for original_model_id in sorted(list(all_model_ids)):
        current_display_prefix = ""
        # 检查是否为[EXPRESS]模型,保留原始前缀
        is_express_model = original_model_id.startswith("[EXPRESS]")
        base_model_without_prefix = original_model_id
        
        # 如果是EXPRESS模型,移除前缀用于后续处理,但在显示时会保留
        if is_express_model:
            # 从原始ID中提取不带[EXPRESS]前缀的模型名
            base_model_without_prefix = original_model_id[len("[EXPRESS] "):]
            vertex_log('info', f"处理EXPRESS模型: {original_model_id}, 基础名称: {base_model_without_prefix}")
        
        # 只有非EXPRESS模型才考虑添加PAY_PREFIX
        if not is_express_model and has_sa_creds and EXPERIMENTAL_MARKER not in original_model_id:
            # 只要有SA凭证就应该显示PAY前缀,不管是否有Express Key
            current_display_prefix = PAY_PREFIX
            vertex_log('info', f"为非EXPRESS模型添加PAY前缀: {current_display_prefix}{original_model_id}")
        
        # 构建显示ID,如果是EXPRESS模型,确保保留[EXPRESS]前缀
        base_display_id = original_model_id if is_express_model else f"{current_display_prefix}{original_model_id}"
        
        vertex_log('info', f"添加模型到列表: {base_display_id}")
        dynamic_models_data.append({
            "id": base_display_id, "object": "model", "created": current_time, "owned_by": "google",
            "permission": [], "root": base_model_without_prefix, "parent": None
        })
        
        # Conditionally add common variations (standard suffixes)
        if not base_model_without_prefix.startswith("gemini-2.0"): # Suffix rules based on original_model_id
            standard_suffixes = ["-search", "-encrypt", "-encrypt-full", "-auto"]
            for suffix in standard_suffixes:
                # Suffix is applied to the original model ID part (without EXPRESS prefix)
                suffixed_model_part = f"{base_model_without_prefix}{suffix}"
                # Then the whole thing is prefixed - retain EXPRESS prefix if it was there
                final_suffixed_display_id = ""
                if is_express_model:
                    final_suffixed_display_id = f"[EXPRESS] {suffixed_model_part}"
                else:
                    # 非EXPRESS模型的后缀版本也需要正确显示PAY前缀
                    final_suffixed_display_id = f"{current_display_prefix}{suffixed_model_part}"
                
                # Check if this suffixed ID is already in all_model_ids (unlikely with prefix) or already added
                if final_suffixed_display_id not in all_model_ids and not any(m['id'] == final_suffixed_display_id for m in dynamic_models_data):
                    vertex_log('info', f"添加后缀模型到列表: {final_suffixed_display_id}")
                    dynamic_models_data.append({
                        "id": final_suffixed_display_id, "object": "model", "created": current_time, "owned_by": "google",
                        "permission": [], "root": base_model_without_prefix, "parent": None
                    })
        
        # Apply special suffixes for models starting with "gemini-2.5-flash"
        if base_model_without_prefix.startswith("gemini-2.5-flash"): # Suffix rules based on original_model_id
            special_flash_suffixes = ["-nothinking", "-max"]
            for special_suffix in special_flash_suffixes:
                suffixed_model_part = f"{base_model_without_prefix}{special_suffix}"
                # Retain EXPRESS prefix if original model had it
                if is_express_model:
                    final_special_suffixed_display_id = f"[EXPRESS] {suffixed_model_part}"
                else:
                    # 非EXPRESS模型的特殊后缀版本也需要正确显示PAY前缀
                    final_special_suffixed_display_id = f"{current_display_prefix}{suffixed_model_part}"

                if final_special_suffixed_display_id not in all_model_ids and not any(m['id'] == final_special_suffixed_display_id for m in dynamic_models_data):
                    vertex_log('info', f"添加特殊后缀模型到列表: {final_special_suffixed_display_id}")
                    dynamic_models_data.append({
                        "id": final_special_suffixed_display_id, "object": "model", "created": current_time, "owned_by": "google",
                        "permission": [], "root": base_model_without_prefix, "parent": None
                    })

        # Apply special suffixes for gemini-2.5-pro-preview-06-05
        if base_model_without_prefix == "gemini-2.5-pro-preview-06-05":
            special_pro_suffixes = ["-nothinking", "-max"]
            for special_suffix in special_pro_suffixes:
                suffixed_model_part = f"{base_model_without_prefix}{special_suffix}"
                # Retain EXPRESS prefix if original model had it
                if is_express_model:
                    final_special_suffixed_display_id = f"[EXPRESS] {suffixed_model_part}"
                else:
                    # 非EXPRESS模型的特殊后缀版本也需要正确显示PAY前缀
                    final_special_suffixed_display_id = f"{current_display_prefix}{suffixed_model_part}"

                if final_special_suffixed_display_id not in all_model_ids and not any(m['id'] == final_special_suffixed_display_id for m in dynamic_models_data):
                    vertex_log('info', f"添加gemini-2.5-pro-preview-06-05特殊后缀模型到列表: {final_special_suffixed_display_id}")
                    dynamic_models_data.append({
                        "id": final_special_suffixed_display_id, "object": "model", "created": current_time, "owned_by": "google",
                        "permission": [], "root": base_model_without_prefix, "parent": None
                    })

        # Ensure uniqueness again after adding suffixes
        # Add OpenAI direct variations if SA creds are available
        if has_sa_creds: # OpenAI direct mode only works with SA credentials
            # `all_model_ids` contains the comprehensive list of base models that are eligible based on current credentials
            # We iterate through this to determine which ones get an -openai variation.
            # `raw_vertex_models` is used here to ensure we only add -openai suffix to models that are
            # fundamentally Vertex models, not just any model that might appear in `all_model_ids` (e.g. from Express list exclusively)
            # if express only key is provided.
            # We iterate through the base models from the main Vertex list.
            for base_model_id_for_openai in raw_vertex_models: # Iterate through original list of GAIA/Vertex base models
                display_model_id = ""
                if EXPERIMENTAL_MARKER in base_model_id_for_openai:
                    display_model_id = f"{base_model_id_for_openai}{OPENAI_DIRECT_SUFFIX}"
                else:
                    # OpenAI直接模式下也应该保持PAY前缀
                    display_model_id = f"{PAY_PREFIX}{base_model_id_for_openai}{OPENAI_DIRECT_SUFFIX}"
                
                # Check if already added (e.g. if remote config somehow already listed it or added as a base model)
                if display_model_id and not any(m['id'] == display_model_id for m in dynamic_models_data):
                    dynamic_models_data.append({
                        "id": display_model_id, "object": "model", "created": current_time, "owned_by": "google",
                        "permission": [], "root": base_model_id_for_openai, "parent": None
                    })
    
    return {"object": "list", "data": sorted(dynamic_models_data, key=lambda x: x['id'])}