Spaces:

HGKo
/

vision-web-app

Runtime error

App Files Files Community

David Ko commited on Aug 12

Commit

e821ec3

1 Parent(s): d10dea2

Fix: LLM query token length limit issue

Browse files

Files changed (1) hide show

api.py +27 -1

api.py CHANGED Viewed

@@ -98,9 +98,24 @@ def process_llm_query(vision_results, user_query):
     if llm_model is None or llm_tokenizer is None:
         return {"error": "LLM model not available"}
     # Create a prompt combining vision results and user query
     prompt = f"""You are an AI assistant analyzing image detection results.
-    Here are the objects detected in the image: {json.dumps(vision_results, indent=2)}
     User question: {user_query}
@@ -111,6 +126,17 @@ def process_llm_query(vision_results, user_query):
     try:
         start_time = time.time()
         inputs = llm_tokenizer(prompt, return_tensors="pt").to(device)
         with torch.no_grad():
             output = llm_model.generate(

     if llm_model is None or llm_tokenizer is None:
         return {"error": "LLM model not available"}
+    # 결과 데이터 요약 (토큰 길이 제한을 위해)
+    summarized_results = []
+    # 객체 탐지 결과 요약
+    if isinstance(vision_results, list):
+        # 최대 10개 객체만 포함
+        for i, obj in enumerate(vision_results[:10]):
+            if isinstance(obj, dict):
+                # 필요한 정보만 추출
+                summary = {
+                    "label": obj.get("label", "unknown"),
+                    "confidence": obj.get("confidence", 0),
+                }
+                summarized_results.append(summary)
     # Create a prompt combining vision results and user query
     prompt = f"""You are an AI assistant analyzing image detection results.
+    Here are the objects detected in the image: {json.dumps(summarized_results, indent=2)}
     User question: {user_query}
     try:
         start_time = time.time()
+        # 토큰 길이 확인 및 제한
+        tokens = llm_tokenizer.encode(prompt)
+        if len(tokens) > 1500:  # 안전 마진 설정
+            prompt = f"""You are an AI assistant analyzing image detection results.
+            The image contains {len(summarized_results)} detected objects.
+            User question: {user_query}
+            Please provide a general analysis based on the user's question.
+            """
         inputs = llm_tokenizer(prompt, return_tensors="pt").to(device)
         with torch.no_grad():
             output = llm_model.generate(