Spaces:

retopara
/

ragflow

Build error

bdss58 lijianyong commited on Jul 30, 2024

Commit

479edea

1 Parent(s): cbc2f9a

add step-1v-8k cv model (#1686)

### What problem does this PR solve?

_Briefly describe what this PR aims to solve. Include background context
that will help reviewers understand the purpose of the PR._

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

---------

Co-authored-by: lijianyong <lijianyong@stepfun.com>

Files changed (3) hide show

conf/llm_factories.json +1 -1
rag/llm/__init__.py +2 -1
rag/llm/cv_model.py +20 -0

conf/llm_factories.json CHANGED Viewed

@@ -1920,7 +1920,7 @@
                 {
                     "llm_name": "step-1v-8k",
                     "tags": "LLM,CHAT,IMAGE2TEXT",
-                    "max_tokens": 8000,
                     "model_type": "image2text"
                 }
             ]

                 {
                     "llm_name": "step-1v-8k",
                     "tags": "LLM,CHAT,IMAGE2TEXT",
+                    "max_tokens": 8192,
                     "model_type": "image2text"
                 }
             ]

rag/llm/__init__.py CHANGED Viewed

@@ -52,7 +52,8 @@ CvModel = {
     "OpenRouter": OpenRouterCV,
     "LocalAI": LocalAICV,
     "NVIDIA": NvidiaCV,
-    "LM-Studio": LmStudioCV
 }

     "OpenRouter": OpenRouterCV,
     "LocalAI": LocalAICV,
     "NVIDIA": NvidiaCV,
+    "LM-Studio": LmStudioCV,
+    "StepFun":StepFunCV
 }

rag/llm/cv_model.py CHANGED Viewed

@@ -622,6 +622,26 @@ class NvidiaCV(Base):
             }
         ]
 class LmStudioCV(GptV4):
     def __init__(self, key, model_name, base_url, lang="Chinese"):

             }
         ]
+class StepFunCV(Base):
+    def __init__(self, key, model_name="step-1v-8k", lang="Chinese", base_url="https://api.stepfun.com/v1"):
+        if not base_url: base_url="https://api.stepfun.com/v1"
+        self.client = OpenAI(api_key=key, base_url=base_url)
+        self.model_name = model_name
+        self.lang = lang
+    def describe(self, image, max_tokens=4096):
+        b64 = self.image2base64(image)
+        prompt = self.prompt(b64)
+        for i in range(len(prompt)):
+            for c in prompt[i]["content"]:
+                if "text" in c: c["type"] = "text"
+        res = self.client.chat.completions.create(
+            model=self.model_name,
+            messages=prompt,
+            max_tokens=max_tokens,
+        )
+        return res.choices[0].message.content.strip(), res.usage.total_tokens
 class LmStudioCV(GptV4):
     def __init__(self, key, model_name, base_url, lang="Chinese"):