notRaphael commited on
Commit
2781fa9
Β·
verified Β·
1 Parent(s): 82f0d5d

docs: add API verification comments to gemini_client.py

Browse files
Files changed (1) hide show
  1. video_intelligence/gemini_client.py +17 -10
video_intelligence/gemini_client.py CHANGED
@@ -2,6 +2,13 @@
2
  Video Intelligence Platform β€” Gemini Integration
3
  Handles video captioning, text embeddings, query decomposition, and RAG generation.
4
  Uses the new google-genai SDK (NOT the deprecated google-generativeai).
 
 
 
 
 
 
 
5
  """
6
  import time
7
  import json
@@ -46,7 +53,7 @@ class GeminiClient:
46
  return response.text or ""
47
 
48
  def caption_frames_batch(self, frames_bytes: List[bytes],
49
- batch_desc: str = "") -> List[str]:
50
  """Caption multiple frames. Each call is independent."""
51
  captions = []
52
  for i, fb in enumerate(frames_bytes):
@@ -58,8 +65,8 @@ class GeminiClient:
58
  captions.append("")
59
  return captions
60
 
61
- def caption_video_segment(self, video_bytes: bytes,
62
- prompt: str = "Describe all objects and actions in this video clip.") -> str:
63
  """Caption a video segment using Gemini's native video understanding."""
64
  response = self.client.models.generate_content(
65
  model=self.vision_model,
@@ -108,7 +115,7 @@ class GeminiClient:
108
  def decompose_query(self, query: str) -> Dict:
109
  """
110
  Decompose a natural language query into sub-queries + boolean operator.
111
-
112
  Examples:
113
  "red car and yellow car" β†’ {"sub_queries": ["red car", "yellow car"], "operator": "AND"}
114
  "people in white OR blue clothes" β†’ {"sub_queries": ["people in white clothes", "people in blue clothes"], "operator": "OR"}
@@ -151,10 +158,10 @@ Respond ONLY with valid JSON:
151
  # ── RAG Answer Generation ───────────────────────────────────────────────
152
 
153
  def generate_rag_answer(self, query: str,
154
- retrieved_contexts: List[Dict]) -> str:
155
  """
156
  Generate a grounded answer using retrieved video segments as context.
157
-
158
  Args:
159
  query: User's original question
160
  retrieved_contexts: List of dicts with keys:
@@ -204,17 +211,17 @@ Instructions:
204
  # ── Akinator Question Generation ────────────────────────────────────────
205
 
206
  def generate_refinement_question(self, query: str,
207
- candidate_attributes: Dict[str, List[str]]) -> Dict:
208
  """
209
  Generate the next best question to narrow down results (Akinator-style).
210
-
211
  Args:
212
  query: Original user query
213
  candidate_attributes: Dict mapping attribute_name β†’ list of unique values
214
  e.g. {"location": ["indoor", "outdoor"], "time_of_day": ["day", "night"]}
215
-
216
  Returns:
217
- {"attribute": "location", "question": "Is the scene indoor or outdoor?",
218
  "options": ["indoor", "outdoor"]}
219
  """
220
  attrs_str = json.dumps(candidate_attributes, indent=2)
 
2
  Video Intelligence Platform β€” Gemini Integration
3
  Handles video captioning, text embeddings, query decomposition, and RAG generation.
4
  Uses the new google-genai SDK (NOT the deprecated google-generativeai).
5
+
6
+ Verified against google-genai >= 1.0:
7
+ - Client: genai.Client(api_key=...)
8
+ - Generate: client.models.generate_content(model=..., contents=[...], config=...)
9
+ - Embed: client.models.embed_content(model=..., contents=..., config=...)
10
+ - Types: types.Part.from_bytes, types.Part.from_text, types.GenerateContentConfig,
11
+ types.EmbedContentConfig
12
  """
13
  import time
14
  import json
 
53
  return response.text or ""
54
 
55
  def caption_frames_batch(self, frames_bytes: List[bytes],
56
+ batch_desc: str = "") -> List[str]:
57
  """Caption multiple frames. Each call is independent."""
58
  captions = []
59
  for i, fb in enumerate(frames_bytes):
 
65
  captions.append("")
66
  return captions
67
 
68
+ def caption_video_segment(self, video_bytes: bytes,
69
+ prompt: str = "Describe all objects and actions in this video clip.") -> str:
70
  """Caption a video segment using Gemini's native video understanding."""
71
  response = self.client.models.generate_content(
72
  model=self.vision_model,
 
115
  def decompose_query(self, query: str) -> Dict:
116
  """
117
  Decompose a natural language query into sub-queries + boolean operator.
118
+
119
  Examples:
120
  "red car and yellow car" β†’ {"sub_queries": ["red car", "yellow car"], "operator": "AND"}
121
  "people in white OR blue clothes" β†’ {"sub_queries": ["people in white clothes", "people in blue clothes"], "operator": "OR"}
 
158
  # ── RAG Answer Generation ───────────────────────────────────────────────
159
 
160
  def generate_rag_answer(self, query: str,
161
+ retrieved_contexts: List[Dict]) -> str:
162
  """
163
  Generate a grounded answer using retrieved video segments as context.
164
+
165
  Args:
166
  query: User's original question
167
  retrieved_contexts: List of dicts with keys:
 
211
  # ── Akinator Question Generation ────────────────────────────────────────
212
 
213
  def generate_refinement_question(self, query: str,
214
+ candidate_attributes: Dict[str, List[str]]) -> Dict:
215
  """
216
  Generate the next best question to narrow down results (Akinator-style).
217
+
218
  Args:
219
  query: Original user query
220
  candidate_attributes: Dict mapping attribute_name β†’ list of unique values
221
  e.g. {"location": ["indoor", "outdoor"], "time_of_day": ["day", "night"]}
222
+
223
  Returns:
224
+ {"attribute": "location", "question": "Is the scene indoor or outdoor?",
225
  "options": ["indoor", "outdoor"]}
226
  """
227
  attrs_str = json.dumps(candidate_attributes, indent=2)