pythonprincess commited on
Commit
f256d02
·
verified ·
1 Parent(s): 50cdb50

Delete gemma_utils.py

Browse files
Files changed (1) hide show
  1. gemma_utils.py +0 -244
gemma_utils.py DELETED
@@ -1,244 +0,0 @@
1
- # models/gemma/gemma_utils.py
2
-
3
- """
4
- Gemma Model Utilities for PENNY Project
5
- Handles text generation using the Gemma-based core language model pipeline.
6
- Provides async generation with structured error handling and logging.
7
- """
8
-
9
- import asyncio
10
- import time
11
- from typing import Dict, Any, Optional
12
-
13
- # --- Logging Imports ---
14
- from app.logging_utils import log_interaction, sanitize_for_logging
15
-
16
- # --- Model Loader Import ---
17
- try:
18
- from app.model_loader import load_model_pipeline
19
- MODEL_LOADER_AVAILABLE = True
20
- except ImportError:
21
- MODEL_LOADER_AVAILABLE = False
22
- import logging
23
- logging.getLogger(__name__).warning("Could not import load_model_pipeline. Gemma service unavailable.")
24
-
25
- # Global variable to store the loaded pipeline for re-use
26
- GEMMA_PIPELINE: Optional[Any] = None
27
- AGENT_NAME = "penny-core-agent"
28
- INITIALIZATION_ATTEMPTED = False
29
-
30
-
31
- def _initialize_gemma_pipeline() -> bool:
32
- """
33
- Initializes the Gemma pipeline only once.
34
-
35
- Returns:
36
- bool: True if initialization succeeded, False otherwise.
37
- """
38
- global GEMMA_PIPELINE, INITIALIZATION_ATTEMPTED
39
-
40
- if INITIALIZATION_ATTEMPTED:
41
- return GEMMA_PIPELINE is not None
42
-
43
- INITIALIZATION_ATTEMPTED = True
44
-
45
- if not MODEL_LOADER_AVAILABLE:
46
- log_interaction(
47
- intent="gemma_initialization",
48
- success=False,
49
- error="model_loader unavailable"
50
- )
51
- return False
52
-
53
- try:
54
- log_interaction(
55
- intent="gemma_initialization",
56
- success=None,
57
- details=f"Loading {AGENT_NAME}"
58
- )
59
-
60
- GEMMA_PIPELINE = load_model_pipeline(AGENT_NAME)
61
-
62
- if GEMMA_PIPELINE is None:
63
- log_interaction(
64
- intent="gemma_initialization",
65
- success=False,
66
- error="Pipeline returned None"
67
- )
68
- return False
69
-
70
- log_interaction(
71
- intent="gemma_initialization",
72
- success=True,
73
- details=f"Model {AGENT_NAME} loaded successfully"
74
- )
75
- return True
76
-
77
- except Exception as e:
78
- log_interaction(
79
- intent="gemma_initialization",
80
- success=False,
81
- error=str(e)
82
- )
83
- return False
84
-
85
-
86
- # Attempt initialization at module load
87
- _initialize_gemma_pipeline()
88
-
89
-
90
- def is_gemma_available() -> bool:
91
- """
92
- Check if Gemma service is available.
93
-
94
- Returns:
95
- bool: True if Gemma pipeline is loaded and ready.
96
- """
97
- return GEMMA_PIPELINE is not None
98
-
99
-
100
- async def generate_response(
101
- prompt: str,
102
- max_new_tokens: int = 256,
103
- temperature: float = 0.7,
104
- tenant_id: Optional[str] = None,
105
- ) -> Dict[str, Any]:
106
- """
107
- Runs text generation using the loaded Gemma pipeline.
108
-
109
- Args:
110
- prompt: The conversational or instruction prompt.
111
- max_new_tokens: The maximum number of tokens to generate (default: 256).
112
- temperature: Controls randomness in generation (default: 0.7).
113
- tenant_id: Optional tenant identifier for logging.
114
-
115
- Returns:
116
- A dictionary containing:
117
- - response (str): The generated text
118
- - available (bool): Whether the service was available
119
- - error (str, optional): Error message if generation failed
120
- - response_time_ms (int, optional): Generation time in milliseconds
121
- """
122
- start_time = time.time()
123
-
124
- global GEMMA_PIPELINE
125
-
126
- # Check availability
127
- if not is_gemma_available():
128
- log_interaction(
129
- intent="gemma_generate",
130
- tenant_id=tenant_id,
131
- success=False,
132
- error="Gemma pipeline not available",
133
- fallback_used=True
134
- )
135
- return {
136
- "response": "I'm having trouble accessing my language model right now. Please try again in a moment!",
137
- "available": False,
138
- "error": "Pipeline not initialized"
139
- }
140
-
141
- # Validate inputs
142
- if not prompt or not isinstance(prompt, str):
143
- log_interaction(
144
- intent="gemma_generate",
145
- tenant_id=tenant_id,
146
- success=False,
147
- error="Invalid prompt provided"
148
- )
149
- return {
150
- "response": "I didn't receive a valid prompt. Could you try again?",
151
- "available": True,
152
- "error": "Invalid input"
153
- }
154
-
155
- # Configure generation parameters
156
- gen_kwargs = {
157
- "max_new_tokens": max_new_tokens,
158
- "temperature": temperature,
159
- "do_sample": True if temperature > 0.0 else False,
160
- "return_full_text": False
161
- }
162
-
163
- try:
164
- loop = asyncio.get_event_loop()
165
-
166
- # Run model inference in thread executor
167
- results = await loop.run_in_executor(
168
- None,
169
- lambda: GEMMA_PIPELINE(prompt, **gen_kwargs)
170
- )
171
-
172
- response_time_ms = int((time.time() - start_time) * 1000)
173
-
174
- # Parse results
175
- if results and isinstance(results, list) and len(results) > 0:
176
- if isinstance(results[0], dict) and 'generated_text' in results[0]:
177
- generated_text = results[0]['generated_text'].strip()
178
-
179
- # Log slow responses
180
- if response_time_ms > 5000:
181
- log_interaction(
182
- intent="gemma_generate_slow",
183
- tenant_id=tenant_id,
184
- success=True,
185
- response_time_ms=response_time_ms,
186
- details="Slow generation detected"
187
- )
188
-
189
- log_interaction(
190
- intent="gemma_generate",
191
- tenant_id=tenant_id,
192
- success=True,
193
- response_time_ms=response_time_ms,
194
- prompt_preview=sanitize_for_logging(prompt[:100])
195
- )
196
-
197
- return {
198
- "response": generated_text,
199
- "available": True,
200
- "response_time_ms": response_time_ms
201
- }
202
-
203
- # Unexpected output format
204
- log_interaction(
205
- intent="gemma_generate",
206
- tenant_id=tenant_id,
207
- success=False,
208
- error="Unexpected model output format",
209
- response_time_ms=response_time_ms
210
- )
211
-
212
- return {
213
- "response": "I got an unexpected response from my language model. Let me try to help you another way!",
214
- "available": True,
215
- "error": "Unexpected output format"
216
- }
217
-
218
- except asyncio.CancelledError:
219
- log_interaction(
220
- intent="gemma_generate",
221
- tenant_id=tenant_id,
222
- success=False,
223
- error="Generation cancelled"
224
- )
225
- raise
226
-
227
- except Exception as e:
228
- response_time_ms = int((time.time() - start_time) * 1000)
229
-
230
- log_interaction(
231
- intent="gemma_generate",
232
- tenant_id=tenant_id,
233
- success=False,
234
- error=str(e),
235
- response_time_ms=response_time_ms,
236
- fallback_used=True
237
- )
238
-
239
- return {
240
- "response": "I'm having trouble generating a response right now. Please try again!",
241
- "available": False,
242
- "error": str(e),
243
- "response_time_ms": response_time_ms
244
- }