AION Protocol Development commited on
Commit
67cec83
·
1 Parent(s): b883a41

fix: Gemma 2 9B max_tokens limit (8192, not 32000)

Browse files

- Changed Groq provider to use min(8192, context_window)
- Gemma 2 9B: context_window=8192 → max_tokens=8192
- Llama models: context_window=128K+ → max_tokens=8192 (capped)
- Updated comments for clarity
- Fixes error: 'max_tokens must be less than or equal to 8192'

Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -179,7 +179,7 @@ def generate_code_with_model(prompt: str, model_name: str, temperature: float =
179
  {"role": "user", "content": prompt}
180
  ],
181
  temperature=temperature,
182
- max_tokens=32000 # Groq limit (kept at 32K)
183
  )
184
  generated_code = response.choices[0].message.content
185
  input_tokens = response.usage.prompt_tokens
@@ -191,7 +191,7 @@ def generate_code_with_model(prompt: str, model_name: str, temperature: float =
191
  model = genai.GenerativeModel(config["model"])
192
  response = model.generate_content(
193
  f"{SYSTEM_PROMPT}\n\nUser request: {prompt}",
194
- generation_config={"temperature": temperature, "max_output_tokens": 32000} # Gemini 2.0 Flash supports up to 8K (65536 is max for SDK)
195
  )
196
  generated_code = response.text
197
  input_tokens = response.usage_metadata.prompt_token_count
 
179
  {"role": "user", "content": prompt}
180
  ],
181
  temperature=temperature,
182
+ max_tokens=min(8192, config.get("context_window", 8192)) # Use model-specific limit (Gemma2=8192, Llama=32K)
183
  )
184
  generated_code = response.choices[0].message.content
185
  input_tokens = response.usage.prompt_tokens
 
191
  model = genai.GenerativeModel(config["model"])
192
  response = model.generate_content(
193
  f"{SYSTEM_PROMPT}\n\nUser request: {prompt}",
194
+ generation_config={"temperature": temperature, "max_output_tokens": 32000} # Gemini 2.0 Flash: 1M context, using 32K for demo
195
  )
196
  generated_code = response.text
197
  input_tokens = response.usage_metadata.prompt_token_count