paresh2806 paresh0628 Kevin Hu commited on
Commit
f6c384e
·
1 Parent(s): 40bbe34

added SVG for Groq model model providers (#1470)

Browse files

#1432 #1447
This PR adds support for the GROQ LLM (Large Language Model).

Groq is an AI solutions company delivering ultra-low latency inference
with the first-ever LPU™ Inference Engine. The Groq API enables
developers to integrate state-of-the-art LLMs, such as Llama-2 and
llama3-70b-8192, into low latency applications with the request limits
specified below. Learn more at [groq.com](https://groq.com/).
Supported Models


| ID | Requests per Minute | Requests per Day | Tokens per Minute |

|----------------------|---------------------|------------------|-------------------|
| gemma-7b-it | 30 | 14,400 | 15,000 |
| gemma2-9b-it | 30 | 14,400 | 15,000 |
| llama3-70b-8192 | 30 | 14,400 | 6,000 |
| llama3-8b-8192 | 30 | 14,400 | 30,000 |
| mixtral-8x7b-32768 | 30 | 14,400 | 5,000 |

---------

Co-authored-by: paresh0628 <paresh.tuvoc@gmail.com>
Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>

api/db/init_data.py CHANGED
@@ -180,6 +180,12 @@ factory_infos = [{
180
  "logo": "",
181
  "tags": "LLM,TEXT EMBEDDING,IMAGE2TEXT",
182
  "status": "1",
 
 
 
 
 
 
183
  }
184
  # {
185
  # "name": "文心一言",
@@ -933,6 +939,47 @@ def init_llm_factory():
933
  "tags": "TEXT EMBEDDING",
934
  "max_tokens": 2048,
935
  "model_type": LLMType.EMBEDDING.value
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
936
  }
937
  ]
938
  for info in factory_infos:
 
180
  "logo": "",
181
  "tags": "LLM,TEXT EMBEDDING,IMAGE2TEXT",
182
  "status": "1",
183
+ },
184
+ {
185
+ "name": "Groq",
186
+ "logo": "",
187
+ "tags": "LLM",
188
+ "status": "1",
189
  }
190
  # {
191
  # "name": "文心一言",
 
939
  "tags": "TEXT EMBEDDING",
940
  "max_tokens": 2048,
941
  "model_type": LLMType.EMBEDDING.value
942
+ },
943
+ # ------------------------ Groq -----------------------
944
+ {
945
+ "fid": factory_infos[18]["name"],
946
+ "llm_name": "gemma-7b-it",
947
+ "tags": "LLM,CHAT,15k",
948
+ "max_tokens": 8192,
949
+
950
+ "model_type": LLMType.CHAT.value
951
+ },
952
+ {
953
+ "fid": factory_infos[18]["name"],
954
+ "llm_name": "gemma2-9b-it",
955
+ "tags": "LLM,CHAT,15k",
956
+ "max_tokens": 8192,
957
+
958
+ "model_type": LLMType.CHAT.value
959
+ },
960
+ {
961
+ "fid": factory_infos[18]["name"],
962
+ "llm_name": "llama3-70b-8192",
963
+ "tags": "LLM,CHAT,6k",
964
+ "max_tokens": 8192,
965
+
966
+ "model_type": LLMType.CHAT.value
967
+ },
968
+ {
969
+ "fid": factory_infos[18]["name"],
970
+ "llm_name": "llama3-8b-8192",
971
+ "tags": "LLM,CHAT,30k",
972
+ "max_tokens": 8192,
973
+
974
+ "model_type": LLMType.CHAT.value
975
+ },
976
+ {
977
+ "fid": factory_infos[18]["name"],
978
+ "llm_name": "mixtral-8x7b-32768",
979
+ "tags": "LLM,CHAT,5k",
980
+ "max_tokens": 32768,
981
+
982
+ "model_type": LLMType.CHAT.value
983
  }
984
  ]
985
  for info in factory_infos:
rag/llm/__init__.py CHANGED
@@ -32,7 +32,8 @@ EmbeddingModel = {
32
  "Jina": JinaEmbed,
33
  "BAAI": DefaultEmbedding,
34
  "Mistral": MistralEmbed,
35
- "Bedrock": BedrockEmbed
 
36
  }
37
 
38
 
 
32
  "Jina": JinaEmbed,
33
  "BAAI": DefaultEmbedding,
34
  "Mistral": MistralEmbed,
35
+ "Bedrock": BedrockEmbed,
36
+ "Groq": GroqChat
37
  }
38
 
39
 
rag/llm/chat_model.py CHANGED
@@ -23,6 +23,7 @@ from ollama import Client
23
  from volcengine.maas.v2 import MaasService
24
  from rag.nlp import is_english
25
  from rag.utils import num_tokens_from_string
 
26
 
27
 
28
  class Base(ABC):
@@ -681,4 +682,63 @@ class GeminiChat(Base):
681
  except Exception as e:
682
  yield ans + "\n**ERROR**: " + str(e)
683
 
684
- yield response._chunks[-1].usage_metadata.total_token_count
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  from volcengine.maas.v2 import MaasService
24
  from rag.nlp import is_english
25
  from rag.utils import num_tokens_from_string
26
+ from groq import Groq
27
 
28
 
29
  class Base(ABC):
 
682
  except Exception as e:
683
  yield ans + "\n**ERROR**: " + str(e)
684
 
685
+ yield response._chunks[-1].usage_metadata.total_token_count
686
+
687
+
688
+
689
+ class GroqChat:
690
+ def __init__(self, key, model_name,base_url=''):
691
+ self.client = Groq(api_key=key)
692
+ self.model_name = model_name
693
+
694
+ def chat(self, system, history, gen_conf):
695
+ if system:
696
+ history.insert(0, {"role": "system", "content": system})
697
+ for k in list(gen_conf.keys()):
698
+ if k not in ["temperature", "top_p", "max_tokens"]:
699
+ del gen_conf[k]
700
+
701
+ ans = ""
702
+ try:
703
+ response = self.client.chat.completions.create(
704
+ model=self.model_name,
705
+ messages=history,
706
+ **gen_conf
707
+ )
708
+ ans = response.choices[0].message.content
709
+ if response.choices[0].finish_reason == "length":
710
+ ans += "...\nFor the content length reason, it stopped, continue?" if self.is_english(
711
+ [ans]) else "······\n由于长度的原因,回答被截断了,要继续吗?"
712
+ return ans, response.usage.total_tokens
713
+ except Exception as e:
714
+ return ans + "\n**ERROR**: " + str(e), 0
715
+
716
+ def chat_streamly(self, system, history, gen_conf):
717
+ if system:
718
+ history.insert(0, {"role": "system", "content": system})
719
+ for k in list(gen_conf.keys()):
720
+ if k not in ["temperature", "top_p", "max_tokens"]:
721
+ del gen_conf[k]
722
+ ans = ""
723
+ total_tokens = 0
724
+ try:
725
+ response = self.client.chat.completions.create(
726
+ model=self.model_name,
727
+ messages=history,
728
+ stream=True,
729
+ **gen_conf
730
+ )
731
+ for resp in response:
732
+ if not resp.choices or not resp.choices[0].delta.content:
733
+ continue
734
+ ans += resp.choices[0].delta.content
735
+ total_tokens += 1
736
+ if resp.choices[0].finish_reason == "length":
737
+ ans += "...\nFor the content length reason, it stopped, continue?" if self.is_english(
738
+ [ans]) else "······\n由于长度的原因,回答被截断了,要继续吗?"
739
+ yield ans
740
+
741
+ except Exception as e:
742
+ yield ans + "\n**ERROR**: " + str(e)
743
+
744
+ yield total_tokens
requirements.txt CHANGED
@@ -147,4 +147,5 @@ markdown==3.6
147
  mistralai==0.4.2
148
  boto3==1.34.140
149
  duckduckgo_search==6.1.9
150
- google-generativeai==0.7.2
 
 
147
  mistralai==0.4.2
148
  boto3==1.34.140
149
  duckduckgo_search==6.1.9
150
+ google-generativeai==0.7.2
151
+ groq==0.9.0
requirements_arm.txt CHANGED
@@ -148,4 +148,5 @@ markdown==3.6
148
  mistralai==0.4.2
149
  boto3==1.34.140
150
  duckduckgo_search==6.1.9
151
- google-generativeai==0.7.2
 
 
148
  mistralai==0.4.2
149
  boto3==1.34.140
150
  duckduckgo_search==6.1.9
151
+ google-generativeai==0.7.2
152
+ groq==0.9.0
requirements_dev.txt CHANGED
@@ -133,4 +133,5 @@ markdown==3.6
133
  mistralai==0.4.2
134
  boto3==1.34.140
135
  duckduckgo_search==6.1.9
136
- google-generativeai==0.7.2
 
 
133
  mistralai==0.4.2
134
  boto3==1.34.140
135
  duckduckgo_search==6.1.9
136
+ google-generativeai==0.7.2
137
+ groq==0.9.0
web/src/assets/svg/llm/Groq.svg ADDED
web/src/pages/user-setting/setting-model/index.tsx CHANGED
@@ -62,6 +62,7 @@ const IconMap = {
62
  'Azure-OpenAI': 'azure',
63
  Bedrock: 'bedrock',
64
  Gemini:'gemini',
 
65
  };
66
 
67
  const LlmIcon = ({ name }: { name: string }) => {
 
62
  'Azure-OpenAI': 'azure',
63
  Bedrock: 'bedrock',
64
  Gemini:'gemini',
65
+ Groq: 'Groq',
66
  };
67
 
68
  const LlmIcon = ({ name }: { name: string }) => {