Spaces:
Sleeping
Sleeping
AION Protocol Development
commited on
Commit
·
67cec83
1
Parent(s):
b883a41
fix: Gemma 2 9B max_tokens limit (8192, not 32000)
Browse files- Changed Groq provider to use min(8192, context_window)
- Gemma 2 9B: context_window=8192 → max_tokens=8192
- Llama models: context_window=128K+ → max_tokens=8192 (capped)
- Updated comments for clarity
- Fixes error: 'max_tokens must be less than or equal to 8192'
app.py
CHANGED
|
@@ -179,7 +179,7 @@ def generate_code_with_model(prompt: str, model_name: str, temperature: float =
|
|
| 179 |
{"role": "user", "content": prompt}
|
| 180 |
],
|
| 181 |
temperature=temperature,
|
| 182 |
-
max_tokens=
|
| 183 |
)
|
| 184 |
generated_code = response.choices[0].message.content
|
| 185 |
input_tokens = response.usage.prompt_tokens
|
|
@@ -191,7 +191,7 @@ def generate_code_with_model(prompt: str, model_name: str, temperature: float =
|
|
| 191 |
model = genai.GenerativeModel(config["model"])
|
| 192 |
response = model.generate_content(
|
| 193 |
f"{SYSTEM_PROMPT}\n\nUser request: {prompt}",
|
| 194 |
-
generation_config={"temperature": temperature, "max_output_tokens": 32000} # Gemini 2.0 Flash
|
| 195 |
)
|
| 196 |
generated_code = response.text
|
| 197 |
input_tokens = response.usage_metadata.prompt_token_count
|
|
|
|
| 179 |
{"role": "user", "content": prompt}
|
| 180 |
],
|
| 181 |
temperature=temperature,
|
| 182 |
+
max_tokens=min(8192, config.get("context_window", 8192)) # Use model-specific limit (Gemma2=8192, Llama=32K)
|
| 183 |
)
|
| 184 |
generated_code = response.choices[0].message.content
|
| 185 |
input_tokens = response.usage.prompt_tokens
|
|
|
|
| 191 |
model = genai.GenerativeModel(config["model"])
|
| 192 |
response = model.generate_content(
|
| 193 |
f"{SYSTEM_PROMPT}\n\nUser request: {prompt}",
|
| 194 |
+
generation_config={"temperature": temperature, "max_output_tokens": 32000} # Gemini 2.0 Flash: 1M context, using 32K for demo
|
| 195 |
)
|
| 196 |
generated_code = response.text
|
| 197 |
input_tokens = response.usage_metadata.prompt_token_count
|