Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
ed97f9d
1
Parent(s):
2cac7a0
added new dspy
Browse files- app.py +1 -1
- scripts/format_response.py +1 -1
- src/managers/session_manager.py +7 -3
- src/routes/session_routes.py +8 -5
- src/utils/model_registry.py +21 -18
app.py
CHANGED
@@ -299,7 +299,7 @@ DEFAULT_MODEL_CONFIG = {
|
|
299 |
|
300 |
"api_key": os.getenv("OPENAI_API_KEY"),
|
301 |
|
302 |
-
"temperature": float(os.getenv("TEMPERATURE", 1.0)),
|
303 |
|
304 |
"max_tokens": int(os.getenv("MAX_TOKENS", 6000)), "cache": False
|
305 |
|
|
|
299 |
|
300 |
"api_key": os.getenv("OPENAI_API_KEY"),
|
301 |
|
302 |
+
"temperature": min(1.0, max(0.0, float(os.getenv("TEMPERATURE", "1.0")))), # Clamp to 0..1
|
303 |
|
304 |
"max_tokens": int(os.getenv("MAX_TOKENS", 6000)), "cache": False
|
305 |
|
scripts/format_response.py
CHANGED
@@ -44,7 +44,7 @@ API_KEY_PATTERNS = [
|
|
44 |
]
|
45 |
|
46 |
# Network request patterns
|
47 |
-
NETWORK_REQUEST_PATTERNS = re.compile(r"(requests\.|urllib\.|http
|
48 |
|
49 |
# DataFrame creation with hardcoded data - block only this specific pattern
|
50 |
|
|
|
44 |
]
|
45 |
|
46 |
# Network request patterns
|
47 |
+
NETWORK_REQUEST_PATTERNS = re.compile(r"(requests\.|urllib\.|http\.client|httpx\.|socket\.connect\()")
|
48 |
|
49 |
# DataFrame creation with hardcoded data - block only this specific pattern
|
50 |
|
src/managers/session_manager.py
CHANGED
@@ -25,6 +25,10 @@ load_dotenv()
|
|
25 |
# Initialize logger
|
26 |
logger = Logger("session_manager", see_time=False, console_log=False)
|
27 |
|
|
|
|
|
|
|
|
|
28 |
class SessionManager:
|
29 |
"""
|
30 |
Manages session-specific state, including datasets, retrievers, and AI systems.
|
@@ -135,7 +139,7 @@ This dataset appears clean with consistent formatting and no missing values, mak
|
|
135 |
"provider": os.getenv("MODEL_PROVIDER", "anthropic"),
|
136 |
"model": os.getenv("MODEL_NAME", "claude-3-5-sonnet-latest"),
|
137 |
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
138 |
-
"temperature":
|
139 |
"max_tokens": int(os.getenv("MAX_TOKENS", 6000))
|
140 |
}
|
141 |
|
@@ -199,7 +203,7 @@ This dataset appears clean with consistent formatting and no missing values, mak
|
|
199 |
"provider": os.getenv("MODEL_PROVIDER", "anthropic"),
|
200 |
"model": os.getenv("MODEL_NAME", "claude-3-5-sonnet-latest"),
|
201 |
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
202 |
-
"temperature":
|
203 |
"max_tokens": int(os.getenv("MAX_TOKENS", 6000))
|
204 |
}
|
205 |
|
@@ -273,7 +277,7 @@ This dataset appears clean with consistent formatting and no missing values, mak
|
|
273 |
"provider": os.getenv("MODEL_PROVIDER", "anthropic"),
|
274 |
"model": os.getenv("MODEL_NAME", "claude-3-5-sonnet-latest"),
|
275 |
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
276 |
-
"temperature":
|
277 |
"max_tokens": int(os.getenv("MAX_TOKENS", 6000))
|
278 |
}
|
279 |
|
|
|
25 |
# Initialize logger
|
26 |
logger = Logger("session_manager", see_time=False, console_log=False)
|
27 |
|
28 |
+
# Helper to clamp temperature to valid range
|
29 |
+
def _get_clamped_temperature():
|
30 |
+
return min(1.0, max(0.0, float(os.getenv("TEMPERATURE", "1.0"))))
|
31 |
+
|
32 |
class SessionManager:
|
33 |
"""
|
34 |
Manages session-specific state, including datasets, retrievers, and AI systems.
|
|
|
139 |
"provider": os.getenv("MODEL_PROVIDER", "anthropic"),
|
140 |
"model": os.getenv("MODEL_NAME", "claude-3-5-sonnet-latest"),
|
141 |
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
142 |
+
"temperature": _get_clamped_temperature(),
|
143 |
"max_tokens": int(os.getenv("MAX_TOKENS", 6000))
|
144 |
}
|
145 |
|
|
|
203 |
"provider": os.getenv("MODEL_PROVIDER", "anthropic"),
|
204 |
"model": os.getenv("MODEL_NAME", "claude-3-5-sonnet-latest"),
|
205 |
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
206 |
+
"temperature": _get_clamped_temperature(),
|
207 |
"max_tokens": int(os.getenv("MAX_TOKENS", 6000))
|
208 |
}
|
209 |
|
|
|
277 |
"provider": os.getenv("MODEL_PROVIDER", "anthropic"),
|
278 |
"model": os.getenv("MODEL_NAME", "claude-3-5-sonnet-latest"),
|
279 |
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
280 |
+
"temperature": _get_clamped_temperature(),
|
281 |
"max_tokens": int(os.getenv("MAX_TOKENS", 6000))
|
282 |
}
|
283 |
|
src/routes/session_routes.py
CHANGED
@@ -384,23 +384,26 @@ async def update_model_settings(
|
|
384 |
# Get session state to update model config
|
385 |
session_state = app_state.get_session_state(session_id)
|
386 |
|
|
|
|
|
|
|
387 |
# Create the model config
|
388 |
if 'gpt-5' in str(settings.model):
|
389 |
model_config = {
|
390 |
"provider": settings.provider,
|
391 |
"model": settings.model,
|
392 |
"api_key": settings.api_key,
|
393 |
-
"temperature":
|
394 |
"max_tokens":16_000
|
395 |
# "max_completion_tokens": 2500
|
396 |
}
|
397 |
-
elif 'o1
|
398 |
model_config = {
|
399 |
"provider": settings.provider,
|
400 |
"model": settings.model,
|
401 |
"api_key": settings.api_key,
|
402 |
-
"temperature": 1,
|
403 |
-
"max_tokens":
|
404 |
}
|
405 |
|
406 |
|
@@ -409,7 +412,7 @@ async def update_model_settings(
|
|
409 |
"provider": settings.provider,
|
410 |
"model": settings.model,
|
411 |
"api_key": settings.api_key,
|
412 |
-
"temperature":
|
413 |
"max_tokens": settings.max_tokens
|
414 |
}
|
415 |
|
|
|
384 |
# Get session state to update model config
|
385 |
session_state = app_state.get_session_state(session_id)
|
386 |
|
387 |
+
# Clamp temperature to valid range (0..1 for all providers)
|
388 |
+
clamped_temp = min(1.0, max(0.0, float(settings.temperature)))
|
389 |
+
|
390 |
# Create the model config
|
391 |
if 'gpt-5' in str(settings.model):
|
392 |
model_config = {
|
393 |
"provider": settings.provider,
|
394 |
"model": settings.model,
|
395 |
"api_key": settings.api_key,
|
396 |
+
"temperature": 1,
|
397 |
"max_tokens":16_000
|
398 |
# "max_completion_tokens": 2500
|
399 |
}
|
400 |
+
elif 'o1' or 'o3' in str(settings.model):
|
401 |
model_config = {
|
402 |
"provider": settings.provider,
|
403 |
"model": settings.model,
|
404 |
"api_key": settings.api_key,
|
405 |
+
"temperature": 1.0, # O-series only supports 1
|
406 |
+
"max_tokens":20_000
|
407 |
}
|
408 |
|
409 |
|
|
|
412 |
"provider": settings.provider,
|
413 |
"model": settings.model,
|
414 |
"api_key": settings.api_key,
|
415 |
+
"temperature": clamped_temp,
|
416 |
"max_tokens": settings.max_tokens
|
417 |
}
|
418 |
|
src/utils/model_registry.py
CHANGED
@@ -10,6 +10,9 @@ PROVIDERS = {
|
|
10 |
}
|
11 |
max_tokens = int(os.getenv("MAX_TOKENS", 6000))
|
12 |
|
|
|
|
|
|
|
13 |
small_lm = dspy.LM('openai/gpt-4o-mini',max_tokens=300,api_key=os.getenv("OPENAI_API_KEY"), cache=False)
|
14 |
|
15 |
mid_lm = dspy.LM('openai/gpt-4o-mini',max_tokens=1300,api_key=os.getenv("OPENAI_API_KEY"), cache=False)
|
@@ -22,7 +25,7 @@ gpt_4o_mini = dspy.LM('openai/gpt-4o-mini',max_tokens=4000,api_key=os.getenv("OP
|
|
22 |
gpt_5_mini = dspy.LM(
|
23 |
model="openai/gpt-5-mini",
|
24 |
api_key=os.getenv("OPENAI_API_KEY"),
|
25 |
-
temperature=
|
26 |
max_tokens= 16_000,
|
27 |
# max_completion_tokens=max_tokens,
|
28 |
cache=False
|
@@ -31,7 +34,7 @@ gpt_5_mini = dspy.LM(
|
|
31 |
gpt_5 = dspy.LM(
|
32 |
model="openai/gpt-5",
|
33 |
api_key=os.getenv("OPENAI_API_KEY"),
|
34 |
-
temperature=
|
35 |
max_tokens= 16_000,
|
36 |
# max_completion_tokens=max_tokens, # Use max_completion_tokens for gpt-5
|
37 |
cache=False
|
@@ -40,7 +43,7 @@ gpt_5 = dspy.LM(
|
|
40 |
gpt_5_nano = dspy.LM(
|
41 |
model="openai/gpt-5-nano",
|
42 |
api_key=os.getenv("OPENAI_API_KEY"),
|
43 |
-
temperature=
|
44 |
max_tokens= 16_000,
|
45 |
# max_completion_tokens=max_tokens,
|
46 |
cache=False
|
@@ -73,7 +76,7 @@ o1_mini = dspy.LM(
|
|
73 |
o3 = dspy.LM(
|
74 |
model="openai/o3-2025-04-16",
|
75 |
api_key=os.getenv("OPENAI_API_KEY"),
|
76 |
-
temperature=
|
77 |
max_tokens=20_000,
|
78 |
cache=False
|
79 |
)
|
@@ -81,7 +84,7 @@ o3 = dspy.LM(
|
|
81 |
o3_mini = dspy.LM(
|
82 |
model="openai/o3-mini",
|
83 |
api_key=os.getenv("OPENAI_API_KEY"),
|
84 |
-
temperature=
|
85 |
max_tokens=20_000,
|
86 |
cache=False
|
87 |
)
|
@@ -89,16 +92,16 @@ o3_mini = dspy.LM(
|
|
89 |
claude_4_5_sonnet_latest = dspy.LM(
|
90 |
model="anthropic/claude-sonnet-4-5-20250929",
|
91 |
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
92 |
-
temperature=
|
93 |
max_tokens=max_tokens,
|
94 |
cache=False
|
95 |
-
)
|
96 |
|
97 |
# Anthropic models
|
98 |
claude_3_5_haiku_latest = dspy.LM(
|
99 |
model="anthropic/claude-3-5-haiku-latest",
|
100 |
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
101 |
-
temperature=
|
102 |
max_tokens=max_tokens,
|
103 |
cache=False
|
104 |
)
|
@@ -106,7 +109,7 @@ claude_3_5_haiku_latest = dspy.LM(
|
|
106 |
claude_3_7_sonnet_latest = dspy.LM(
|
107 |
model="anthropic/claude-3-7-sonnet-latest",
|
108 |
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
109 |
-
temperature=
|
110 |
max_tokens=max_tokens,
|
111 |
cache=False
|
112 |
)
|
@@ -114,7 +117,7 @@ claude_3_7_sonnet_latest = dspy.LM(
|
|
114 |
claude_3_5_sonnet_latest = dspy.LM(
|
115 |
model="anthropic/claude-3-5-sonnet-latest",
|
116 |
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
117 |
-
temperature=
|
118 |
max_tokens=max_tokens,
|
119 |
cache=False
|
120 |
)
|
@@ -122,7 +125,7 @@ claude_3_5_sonnet_latest = dspy.LM(
|
|
122 |
claude_sonnet_4_20250514 = dspy.LM(
|
123 |
model="anthropic/claude-sonnet-4-20250514",
|
124 |
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
125 |
-
temperature=
|
126 |
max_tokens=max_tokens,
|
127 |
cache=False
|
128 |
)
|
@@ -130,7 +133,7 @@ claude_sonnet_4_20250514 = dspy.LM(
|
|
130 |
claude_3_opus_latest = dspy.LM(
|
131 |
model="anthropic/claude-3-opus-latest",
|
132 |
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
133 |
-
temperature=
|
134 |
max_tokens=max_tokens,
|
135 |
cache=False
|
136 |
)
|
@@ -138,7 +141,7 @@ claude_3_opus_latest = dspy.LM(
|
|
138 |
claude_opus_4_20250514 = dspy.LM(
|
139 |
model="anthropic/claude-opus-4-20250514",
|
140 |
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
141 |
-
temperature=
|
142 |
max_tokens=max_tokens,
|
143 |
cache=False
|
144 |
)
|
@@ -146,7 +149,7 @@ claude_opus_4_20250514 = dspy.LM(
|
|
146 |
claude_opus_4_1 = dspy.LM(
|
147 |
model="anthropic/claude-opus-4-1",
|
148 |
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
149 |
-
temperature=
|
150 |
max_tokens=max_tokens,
|
151 |
cache=False
|
152 |
)
|
@@ -155,7 +158,7 @@ claude_opus_4_1 = dspy.LM(
|
|
155 |
deepseek_r1_distill_llama_70b = dspy.LM(
|
156 |
model="groq/deepseek-r1-distill-llama-70b",
|
157 |
api_key=os.getenv("GROQ_API_KEY"),
|
158 |
-
temperature=
|
159 |
max_tokens=max_tokens,
|
160 |
cache=False
|
161 |
)
|
@@ -163,7 +166,7 @@ deepseek_r1_distill_llama_70b = dspy.LM(
|
|
163 |
gpt_oss_120B = dspy.LM(
|
164 |
model="groq/gpt-oss-120B",
|
165 |
api_key=os.getenv("GROQ_API_KEY"),
|
166 |
-
temperature=
|
167 |
max_tokens=max_tokens,
|
168 |
cache=False
|
169 |
)
|
@@ -171,7 +174,7 @@ gpt_oss_120B = dspy.LM(
|
|
171 |
gpt_oss_20B = dspy.LM(
|
172 |
model="groq/gpt-oss-20B",
|
173 |
api_key=os.getenv("GROQ_API_KEY"),
|
174 |
-
temperature=
|
175 |
max_tokens=max_tokens,
|
176 |
cache=False
|
177 |
)
|
@@ -179,7 +182,7 @@ gpt_oss_20B = dspy.LM(
|
|
179 |
gemini_2_5_pro_preview_03_25 = dspy.LM(
|
180 |
model="gemini/gemini-2.5-pro-preview-03-25",
|
181 |
api_key=os.getenv("GEMINI_API_KEY"),
|
182 |
-
temperature=
|
183 |
max_tokens=max_tokens,
|
184 |
cache=False
|
185 |
)
|
|
|
10 |
}
|
11 |
max_tokens = int(os.getenv("MAX_TOKENS", 6000))
|
12 |
|
13 |
+
# Clamp temperature to valid range (0..1) for all models
|
14 |
+
default_temperature = min(1.0, max(0.0, float(os.getenv("TEMPERATURE", "1.0"))))
|
15 |
+
|
16 |
small_lm = dspy.LM('openai/gpt-4o-mini',max_tokens=300,api_key=os.getenv("OPENAI_API_KEY"), cache=False)
|
17 |
|
18 |
mid_lm = dspy.LM('openai/gpt-4o-mini',max_tokens=1300,api_key=os.getenv("OPENAI_API_KEY"), cache=False)
|
|
|
25 |
gpt_5_mini = dspy.LM(
|
26 |
model="openai/gpt-5-mini",
|
27 |
api_key=os.getenv("OPENAI_API_KEY"),
|
28 |
+
temperature=default_temperature,
|
29 |
max_tokens= 16_000,
|
30 |
# max_completion_tokens=max_tokens,
|
31 |
cache=False
|
|
|
34 |
gpt_5 = dspy.LM(
|
35 |
model="openai/gpt-5",
|
36 |
api_key=os.getenv("OPENAI_API_KEY"),
|
37 |
+
temperature=default_temperature,
|
38 |
max_tokens= 16_000,
|
39 |
# max_completion_tokens=max_tokens, # Use max_completion_tokens for gpt-5
|
40 |
cache=False
|
|
|
43 |
gpt_5_nano = dspy.LM(
|
44 |
model="openai/gpt-5-nano",
|
45 |
api_key=os.getenv("OPENAI_API_KEY"),
|
46 |
+
temperature=default_temperature,
|
47 |
max_tokens= 16_000,
|
48 |
# max_completion_tokens=max_tokens,
|
49 |
cache=False
|
|
|
76 |
o3 = dspy.LM(
|
77 |
model="openai/o3-2025-04-16",
|
78 |
api_key=os.getenv("OPENAI_API_KEY"),
|
79 |
+
temperature=default_temperature,
|
80 |
max_tokens=20_000,
|
81 |
cache=False
|
82 |
)
|
|
|
84 |
o3_mini = dspy.LM(
|
85 |
model="openai/o3-mini",
|
86 |
api_key=os.getenv("OPENAI_API_KEY"),
|
87 |
+
temperature=default_temperature,
|
88 |
max_tokens=20_000,
|
89 |
cache=False
|
90 |
)
|
|
|
92 |
claude_4_5_sonnet_latest = dspy.LM(
|
93 |
model="anthropic/claude-sonnet-4-5-20250929",
|
94 |
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
95 |
+
temperature=default_temperature,
|
96 |
max_tokens=max_tokens,
|
97 |
cache=False
|
98 |
+
) don
|
99 |
|
100 |
# Anthropic models
|
101 |
claude_3_5_haiku_latest = dspy.LM(
|
102 |
model="anthropic/claude-3-5-haiku-latest",
|
103 |
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
104 |
+
temperature=default_temperature,
|
105 |
max_tokens=max_tokens,
|
106 |
cache=False
|
107 |
)
|
|
|
109 |
claude_3_7_sonnet_latest = dspy.LM(
|
110 |
model="anthropic/claude-3-7-sonnet-latest",
|
111 |
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
112 |
+
temperature=default_temperature,
|
113 |
max_tokens=max_tokens,
|
114 |
cache=False
|
115 |
)
|
|
|
117 |
claude_3_5_sonnet_latest = dspy.LM(
|
118 |
model="anthropic/claude-3-5-sonnet-latest",
|
119 |
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
120 |
+
temperature=default_temperature,
|
121 |
max_tokens=max_tokens,
|
122 |
cache=False
|
123 |
)
|
|
|
125 |
claude_sonnet_4_20250514 = dspy.LM(
|
126 |
model="anthropic/claude-sonnet-4-20250514",
|
127 |
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
128 |
+
temperature=default_temperature,
|
129 |
max_tokens=max_tokens,
|
130 |
cache=False
|
131 |
)
|
|
|
133 |
claude_3_opus_latest = dspy.LM(
|
134 |
model="anthropic/claude-3-opus-latest",
|
135 |
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
136 |
+
temperature=default_temperature,
|
137 |
max_tokens=max_tokens,
|
138 |
cache=False
|
139 |
)
|
|
|
141 |
claude_opus_4_20250514 = dspy.LM(
|
142 |
model="anthropic/claude-opus-4-20250514",
|
143 |
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
144 |
+
temperature=default_temperature,
|
145 |
max_tokens=max_tokens,
|
146 |
cache=False
|
147 |
)
|
|
|
149 |
claude_opus_4_1 = dspy.LM(
|
150 |
model="anthropic/claude-opus-4-1",
|
151 |
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
152 |
+
temperature=default_temperature,
|
153 |
max_tokens=max_tokens,
|
154 |
cache=False
|
155 |
)
|
|
|
158 |
deepseek_r1_distill_llama_70b = dspy.LM(
|
159 |
model="groq/deepseek-r1-distill-llama-70b",
|
160 |
api_key=os.getenv("GROQ_API_KEY"),
|
161 |
+
temperature=default_temperature,
|
162 |
max_tokens=max_tokens,
|
163 |
cache=False
|
164 |
)
|
|
|
166 |
gpt_oss_120B = dspy.LM(
|
167 |
model="groq/gpt-oss-120B",
|
168 |
api_key=os.getenv("GROQ_API_KEY"),
|
169 |
+
temperature=default_temperature,
|
170 |
max_tokens=max_tokens,
|
171 |
cache=False
|
172 |
)
|
|
|
174 |
gpt_oss_20B = dspy.LM(
|
175 |
model="groq/gpt-oss-20B",
|
176 |
api_key=os.getenv("GROQ_API_KEY"),
|
177 |
+
temperature=default_temperature,
|
178 |
max_tokens=max_tokens,
|
179 |
cache=False
|
180 |
)
|
|
|
182 |
gemini_2_5_pro_preview_03_25 = dspy.LM(
|
183 |
model="gemini/gemini-2.5-pro-preview-03-25",
|
184 |
api_key=os.getenv("GEMINI_API_KEY"),
|
185 |
+
temperature=default_temperature,
|
186 |
max_tokens=max_tokens,
|
187 |
cache=False
|
188 |
)
|