timeki commited on
Commit
87bf2a1
·
1 Parent(s): bcad597

change max tokens for answers

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. climateqa/engine/llm/azure.py +15 -5
app.py CHANGED
@@ -72,7 +72,7 @@ vectorstore_graphs = get_azure_search_vectorstore(embeddings=embeddings_function
72
  vectorstore_region = get_azure_search_vectorstore(embeddings=embeddings_function, index_name="climateqa-v2")
73
 
74
 
75
- llm = get_llm(provider="azure", max_tokens=1024, temperature=0.0, streaming=True)
76
  if os.environ["GRADIO_ENV"] == "local":
77
  reranker = get_reranker("nano")
78
  else:
 
72
  vectorstore_region = get_azure_search_vectorstore(embeddings=embeddings_function, index_name="climateqa-v2")
73
 
74
 
75
+ llm = get_llm(provider="azure", max_tokens=3000, temperature=0.0, streaming=True)
76
  if os.environ["GRADIO_ENV"] == "local":
77
  reranker = get_reranker("nano")
78
  else:
climateqa/engine/llm/azure.py CHANGED
@@ -109,9 +109,19 @@ def get_llm(
109
  model_kwargs = kwargs.pop("model_kwargs", {})
110
  model_kwargs["max_completion_tokens"] = max_tokens
111
 
112
- # GPT-5/o1 models only support temperature=1, so we force it to 1
113
- # (ignoring any temperature value passed by the caller)
114
- fixed_temperature = 1.0
 
 
 
 
 
 
 
 
 
 
115
 
116
  # Use Service Principal authentication if credentials are available
117
  if all([client_id, client_credential, tenant_id]):
@@ -121,7 +131,7 @@ def get_llm(
121
  azure_deployment=deployment_name,
122
  api_version=AZURE_OPENAI_API_VERSION,
123
  azure_ad_token_provider=_get_azure_ad_token,
124
- temperature=fixed_temperature,
125
  verbose=verbose,
126
  streaming=streaming,
127
  timeout=timeout,
@@ -142,7 +152,7 @@ def get_llm(
142
  azure_deployment=deployment_name,
143
  api_version=AZURE_OPENAI_API_VERSION,
144
  api_key=api_key,
145
- temperature=fixed_temperature,
146
  verbose=verbose,
147
  streaming=streaming,
148
  timeout=timeout,
 
109
  model_kwargs = kwargs.pop("model_kwargs", {})
110
  model_kwargs["max_completion_tokens"] = max_tokens
111
 
112
+ # Some models only support temperature=1:
113
+ # - o1 reasoning models (o1, o1-mini, o1-preview)
114
+ # - GPT-5 models (gpt-5, gpt-5-mini, etc.)
115
+ deployment_lower = deployment_name.lower()
116
+ is_o1_model = "o1" in deployment_lower and "gpt" not in deployment_lower
117
+ is_gpt5_model = "gpt-5" in deployment_lower or "gpt5" in deployment_lower
118
+ requires_fixed_temperature = is_o1_model or is_gpt5_model
119
+
120
+ if requires_fixed_temperature:
121
+ print(f"Using fixed temperature=1.0 for model: {deployment_name} (model only supports temperature=1)")
122
+ effective_temperature = 1.0
123
+ else:
124
+ effective_temperature = temperature
125
 
126
  # Use Service Principal authentication if credentials are available
127
  if all([client_id, client_credential, tenant_id]):
 
131
  azure_deployment=deployment_name,
132
  api_version=AZURE_OPENAI_API_VERSION,
133
  azure_ad_token_provider=_get_azure_ad_token,
134
+ temperature=effective_temperature,
135
  verbose=verbose,
136
  streaming=streaming,
137
  timeout=timeout,
 
152
  azure_deployment=deployment_name,
153
  api_version=AZURE_OPENAI_API_VERSION,
154
  api_key=api_key,
155
+ temperature=effective_temperature,
156
  verbose=verbose,
157
  streaming=streaming,
158
  timeout=timeout,