Spaces:
Runtime error
Runtime error
change max tokens for answers
Browse files- app.py +1 -1
- climateqa/engine/llm/azure.py +15 -5
app.py
CHANGED
|
@@ -72,7 +72,7 @@ vectorstore_graphs = get_azure_search_vectorstore(embeddings=embeddings_function
|
|
| 72 |
vectorstore_region = get_azure_search_vectorstore(embeddings=embeddings_function, index_name="climateqa-v2")
|
| 73 |
|
| 74 |
|
| 75 |
-
llm = get_llm(provider="azure", max_tokens=
|
| 76 |
if os.environ["GRADIO_ENV"] == "local":
|
| 77 |
reranker = get_reranker("nano")
|
| 78 |
else:
|
|
|
|
| 72 |
vectorstore_region = get_azure_search_vectorstore(embeddings=embeddings_function, index_name="climateqa-v2")
|
| 73 |
|
| 74 |
|
| 75 |
+
llm = get_llm(provider="azure", max_tokens=3000, temperature=0.0, streaming=True)
|
| 76 |
if os.environ["GRADIO_ENV"] == "local":
|
| 77 |
reranker = get_reranker("nano")
|
| 78 |
else:
|
climateqa/engine/llm/azure.py
CHANGED
|
@@ -109,9 +109,19 @@ def get_llm(
|
|
| 109 |
model_kwargs = kwargs.pop("model_kwargs", {})
|
| 110 |
model_kwargs["max_completion_tokens"] = max_tokens
|
| 111 |
|
| 112 |
-
#
|
| 113 |
-
#
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
# Use Service Principal authentication if credentials are available
|
| 117 |
if all([client_id, client_credential, tenant_id]):
|
|
@@ -121,7 +131,7 @@ def get_llm(
|
|
| 121 |
azure_deployment=deployment_name,
|
| 122 |
api_version=AZURE_OPENAI_API_VERSION,
|
| 123 |
azure_ad_token_provider=_get_azure_ad_token,
|
| 124 |
-
temperature=
|
| 125 |
verbose=verbose,
|
| 126 |
streaming=streaming,
|
| 127 |
timeout=timeout,
|
|
@@ -142,7 +152,7 @@ def get_llm(
|
|
| 142 |
azure_deployment=deployment_name,
|
| 143 |
api_version=AZURE_OPENAI_API_VERSION,
|
| 144 |
api_key=api_key,
|
| 145 |
-
temperature=
|
| 146 |
verbose=verbose,
|
| 147 |
streaming=streaming,
|
| 148 |
timeout=timeout,
|
|
|
|
| 109 |
model_kwargs = kwargs.pop("model_kwargs", {})
|
| 110 |
model_kwargs["max_completion_tokens"] = max_tokens
|
| 111 |
|
| 112 |
+
# Some models only support temperature=1:
|
| 113 |
+
# - o1 reasoning models (o1, o1-mini, o1-preview)
|
| 114 |
+
# - GPT-5 models (gpt-5, gpt-5-mini, etc.)
|
| 115 |
+
deployment_lower = deployment_name.lower()
|
| 116 |
+
is_o1_model = "o1" in deployment_lower and "gpt" not in deployment_lower
|
| 117 |
+
is_gpt5_model = "gpt-5" in deployment_lower or "gpt5" in deployment_lower
|
| 118 |
+
requires_fixed_temperature = is_o1_model or is_gpt5_model
|
| 119 |
+
|
| 120 |
+
if requires_fixed_temperature:
|
| 121 |
+
print(f"Using fixed temperature=1.0 for model: {deployment_name} (model only supports temperature=1)")
|
| 122 |
+
effective_temperature = 1.0
|
| 123 |
+
else:
|
| 124 |
+
effective_temperature = temperature
|
| 125 |
|
| 126 |
# Use Service Principal authentication if credentials are available
|
| 127 |
if all([client_id, client_credential, tenant_id]):
|
|
|
|
| 131 |
azure_deployment=deployment_name,
|
| 132 |
api_version=AZURE_OPENAI_API_VERSION,
|
| 133 |
azure_ad_token_provider=_get_azure_ad_token,
|
| 134 |
+
temperature=effective_temperature,
|
| 135 |
verbose=verbose,
|
| 136 |
streaming=streaming,
|
| 137 |
timeout=timeout,
|
|
|
|
| 152 |
azure_deployment=deployment_name,
|
| 153 |
api_version=AZURE_OPENAI_API_VERSION,
|
| 154 |
api_key=api_key,
|
| 155 |
+
temperature=effective_temperature,
|
| 156 |
verbose=verbose,
|
| 157 |
streaming=streaming,
|
| 158 |
timeout=timeout,
|