David Ko commited on
Commit
675ab86
·
1 Parent(s): 0692db7

Update api.py

Browse files
Files changed (1) hide show
  1. api.py +44 -9
api.py CHANGED
@@ -1724,16 +1724,51 @@ def vision_rag_query():
1724
  except Exception as _e:
1725
  print("[VRAG][WARN] failed to log context:", _e)
1726
 
 
 
 
1727
  try:
1728
  start = time.time()
1729
- llm = ChatOpenAI(api_key=api_key, model=os.environ.get('OPENAI_MODEL', 'gpt-4o'))
1730
- # Keep it simple: template -> LLM -> string
1731
- prompt = ChatPromptTemplate.from_messages([
1732
- ("system", system_text),
1733
- ("human", "{input}")
1734
- ])
1735
- chain = prompt | llm | StrOutputParser()
1736
- answer = chain.invoke({"input": user_text})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1737
  latency = round(time.time() - start, 3)
1738
  except Exception as e:
1739
  return jsonify({"error": f"LLM call failed: {str(e)}"}), 502
@@ -1741,7 +1776,7 @@ def vision_rag_query():
1741
  return jsonify({
1742
  "answer": answer,
1743
  "retrieved": context_items,
1744
- "model": getattr(llm, 'model', None),
1745
  "latency_sec": latency
1746
  })
1747
 
 
1724
  except Exception as _e:
1725
  print("[VRAG][WARN] failed to log context:", _e)
1726
 
1727
+ # Attempt multimodal call (text + top-1 image) if available; otherwise fallback to text-only LangChain.
1728
+ answer = None
1729
+ model_used = None
1730
  try:
1731
  start = time.time()
1732
+ top_data_url = None
1733
+ try:
1734
+ if formatted:
1735
+ md0 = (formatted[0] or {}).get('metadata') or {}
1736
+ img_b64 = md0.get('image_data')
1737
+ if isinstance(img_b64, str) and len(img_b64) > 50:
1738
+ # Construct data URL without logging raw base64
1739
+ top_data_url = 'data:image/jpeg;base64,' + img_b64
1740
+ except Exception:
1741
+ top_data_url = None
1742
+
1743
+ # Prefer OpenAI SDK for multimodal if available and we have an image
1744
+ if OpenAI is not None and top_data_url is not None:
1745
+ client = OpenAI(api_key=api_key)
1746
+ model_used = os.environ.get('OPENAI_MODEL', 'gpt-4o')
1747
+ chat = client.chat.completions.create(
1748
+ model=model_used,
1749
+ messages=[
1750
+ {"role": "system", "content": system_text},
1751
+ {
1752
+ "role": "user",
1753
+ "content": [
1754
+ {"type": "text", "text": user_text},
1755
+ {"type": "image_url", "image_url": {"url": top_data_url}},
1756
+ ],
1757
+ },
1758
+ ],
1759
+ )
1760
+ answer = chat.choices[0].message.content if chat and chat.choices else ''
1761
+ else:
1762
+ # Fallback to existing LangChain text-only flow
1763
+ llm = ChatOpenAI(api_key=api_key, model=os.environ.get('OPENAI_MODEL', 'gpt-4o'))
1764
+ prompt = ChatPromptTemplate.from_messages([
1765
+ ("system", system_text),
1766
+ ("human", "{input}")
1767
+ ])
1768
+ chain = prompt | llm | StrOutputParser()
1769
+ answer = chain.invoke({"input": user_text})
1770
+ model_used = getattr(llm, 'model', None)
1771
+
1772
  latency = round(time.time() - start, 3)
1773
  except Exception as e:
1774
  return jsonify({"error": f"LLM call failed: {str(e)}"}), 502
 
1776
  return jsonify({
1777
  "answer": answer,
1778
  "retrieved": context_items,
1779
+ "model": model_used,
1780
  "latency_sec": latency
1781
  })
1782