Spaces:
Running
Running
David Ko
commited on
Commit
·
675ab86
1
Parent(s):
0692db7
Update api.py
Browse files
api.py
CHANGED
@@ -1724,16 +1724,51 @@ def vision_rag_query():
|
|
1724 |
except Exception as _e:
|
1725 |
print("[VRAG][WARN] failed to log context:", _e)
|
1726 |
|
|
|
|
|
|
|
1727 |
try:
|
1728 |
start = time.time()
|
1729 |
-
|
1730 |
-
|
1731 |
-
|
1732 |
-
|
1733 |
-
|
1734 |
-
|
1735 |
-
|
1736 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1737 |
latency = round(time.time() - start, 3)
|
1738 |
except Exception as e:
|
1739 |
return jsonify({"error": f"LLM call failed: {str(e)}"}), 502
|
@@ -1741,7 +1776,7 @@ def vision_rag_query():
|
|
1741 |
return jsonify({
|
1742 |
"answer": answer,
|
1743 |
"retrieved": context_items,
|
1744 |
-
"model":
|
1745 |
"latency_sec": latency
|
1746 |
})
|
1747 |
|
|
|
1724 |
except Exception as _e:
|
1725 |
print("[VRAG][WARN] failed to log context:", _e)
|
1726 |
|
1727 |
+
# Attempt multimodal call (text + top-1 image) if available; otherwise fallback to text-only LangChain.
|
1728 |
+
answer = None
|
1729 |
+
model_used = None
|
1730 |
try:
|
1731 |
start = time.time()
|
1732 |
+
top_data_url = None
|
1733 |
+
try:
|
1734 |
+
if formatted:
|
1735 |
+
md0 = (formatted[0] or {}).get('metadata') or {}
|
1736 |
+
img_b64 = md0.get('image_data')
|
1737 |
+
if isinstance(img_b64, str) and len(img_b64) > 50:
|
1738 |
+
# Construct data URL without logging raw base64
|
1739 |
+
top_data_url = 'data:image/jpeg;base64,' + img_b64
|
1740 |
+
except Exception:
|
1741 |
+
top_data_url = None
|
1742 |
+
|
1743 |
+
# Prefer OpenAI SDK for multimodal if available and we have an image
|
1744 |
+
if OpenAI is not None and top_data_url is not None:
|
1745 |
+
client = OpenAI(api_key=api_key)
|
1746 |
+
model_used = os.environ.get('OPENAI_MODEL', 'gpt-4o')
|
1747 |
+
chat = client.chat.completions.create(
|
1748 |
+
model=model_used,
|
1749 |
+
messages=[
|
1750 |
+
{"role": "system", "content": system_text},
|
1751 |
+
{
|
1752 |
+
"role": "user",
|
1753 |
+
"content": [
|
1754 |
+
{"type": "text", "text": user_text},
|
1755 |
+
{"type": "image_url", "image_url": {"url": top_data_url}},
|
1756 |
+
],
|
1757 |
+
},
|
1758 |
+
],
|
1759 |
+
)
|
1760 |
+
answer = chat.choices[0].message.content if chat and chat.choices else ''
|
1761 |
+
else:
|
1762 |
+
# Fallback to existing LangChain text-only flow
|
1763 |
+
llm = ChatOpenAI(api_key=api_key, model=os.environ.get('OPENAI_MODEL', 'gpt-4o'))
|
1764 |
+
prompt = ChatPromptTemplate.from_messages([
|
1765 |
+
("system", system_text),
|
1766 |
+
("human", "{input}")
|
1767 |
+
])
|
1768 |
+
chain = prompt | llm | StrOutputParser()
|
1769 |
+
answer = chain.invoke({"input": user_text})
|
1770 |
+
model_used = getattr(llm, 'model', None)
|
1771 |
+
|
1772 |
latency = round(time.time() - start, 3)
|
1773 |
except Exception as e:
|
1774 |
return jsonify({"error": f"LLM call failed: {str(e)}"}), 502
|
|
|
1776 |
return jsonify({
|
1777 |
"answer": answer,
|
1778 |
"retrieved": context_items,
|
1779 |
+
"model": model_used,
|
1780 |
"latency_sec": latency
|
1781 |
})
|
1782 |
|