bdss58 lijianyong commited on
Commit
479edea
·
1 Parent(s): cbc2f9a

add step-1v-8k cv model (#1686)

Browse files

### What problem does this PR solve?

_Briefly describe what this PR aims to solve. Include background context
that will help reviewers understand the purpose of the PR._

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

---------

Co-authored-by: lijianyong <lijianyong@stepfun.com>

conf/llm_factories.json CHANGED
@@ -1920,7 +1920,7 @@
1920
  {
1921
  "llm_name": "step-1v-8k",
1922
  "tags": "LLM,CHAT,IMAGE2TEXT",
1923
- "max_tokens": 8000,
1924
  "model_type": "image2text"
1925
  }
1926
  ]
 
1920
  {
1921
  "llm_name": "step-1v-8k",
1922
  "tags": "LLM,CHAT,IMAGE2TEXT",
1923
+ "max_tokens": 8192,
1924
  "model_type": "image2text"
1925
  }
1926
  ]
rag/llm/__init__.py CHANGED
@@ -52,7 +52,8 @@ CvModel = {
52
  "OpenRouter": OpenRouterCV,
53
  "LocalAI": LocalAICV,
54
  "NVIDIA": NvidiaCV,
55
- "LM-Studio": LmStudioCV
 
56
  }
57
 
58
 
 
52
  "OpenRouter": OpenRouterCV,
53
  "LocalAI": LocalAICV,
54
  "NVIDIA": NvidiaCV,
55
+ "LM-Studio": LmStudioCV,
56
+ "StepFun":StepFunCV
57
  }
58
 
59
 
rag/llm/cv_model.py CHANGED
@@ -622,6 +622,26 @@ class NvidiaCV(Base):
622
  }
623
  ]
624
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
625
 
626
  class LmStudioCV(GptV4):
627
  def __init__(self, key, model_name, base_url, lang="Chinese"):
 
622
  }
623
  ]
624
 
625
+ class StepFunCV(Base):
626
+ def __init__(self, key, model_name="step-1v-8k", lang="Chinese", base_url="https://api.stepfun.com/v1"):
627
+ if not base_url: base_url="https://api.stepfun.com/v1"
628
+ self.client = OpenAI(api_key=key, base_url=base_url)
629
+ self.model_name = model_name
630
+ self.lang = lang
631
+
632
+ def describe(self, image, max_tokens=4096):
633
+ b64 = self.image2base64(image)
634
+ prompt = self.prompt(b64)
635
+ for i in range(len(prompt)):
636
+ for c in prompt[i]["content"]:
637
+ if "text" in c: c["type"] = "text"
638
+
639
+ res = self.client.chat.completions.create(
640
+ model=self.model_name,
641
+ messages=prompt,
642
+ max_tokens=max_tokens,
643
+ )
644
+ return res.choices[0].message.content.strip(), res.usage.total_tokens
645
 
646
  class LmStudioCV(GptV4):
647
  def __init__(self, key, model_name, base_url, lang="Chinese"):