nsarrazin HF staff commited on
Commit
a99cca3
1 Parent(s): 0819256

Add support for cohere endpoints (#976)

Browse files

* Add support for cohere endpoints

* readme update

.env CHANGED
@@ -13,6 +13,7 @@ OPENAI_API_KEY=#your openai api key here
13
  ANTHROPIC_API_KEY=#your anthropic api key here
14
  CLOUDFLARE_ACCOUNT_ID=#your cloudflare account id here
15
  CLOUDFLARE_API_TOKEN=#your cloudflare api token here
 
16
 
17
  HF_ACCESS_TOKEN=#LEGACY! Use HF_TOKEN instead
18
 
 
13
  ANTHROPIC_API_KEY=#your anthropic api key here
14
  CLOUDFLARE_ACCOUNT_ID=#your cloudflare account id here
15
  CLOUDFLARE_API_TOKEN=#your cloudflare api token here
16
+ COHERE_API_TOKEN=#your cohere api token here
17
 
18
  HF_ACCESS_TOKEN=#LEGACY! Use HF_TOKEN instead
19
 
README.md CHANGED
@@ -560,6 +560,28 @@ You can find the list of models available on Cloudflare [here](https://developer
560
  > [!NOTE]
561
  > Cloudlare Workers AI currently do not support custom sampling parameters like temperature, top_p, etc.
562
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
563
  ##### Google Vertex models
564
 
565
  Chat UI can connect to the google Vertex API endpoints ([List of supported models](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models)).
 
560
  > [!NOTE]
561
  > Cloudlare Workers AI currently do not support custom sampling parameters like temperature, top_p, etc.
562
 
563
+ #### Cohere
564
+
565
+ You can also use Cohere to run their models directly from chat-ui. You will need to have a Cohere account, then get your [API token](https://dashboard.cohere.com/api-keys). You can either specify it directly in your `.env.local` using the `COHERE_API_TOKEN` variable, or you can set it in the endpoint config.
566
+
567
+ Here is an example of a Cohere model config. You can set which model you want to use by setting the `id` field to the model name.
568
+
569
+ ```env
570
+ {
571
+ "name" : "CohereForAI/c4ai-command-r-v01",
572
+ "id": "command-r",
573
+ "description": "C4AI Command-R is a research release of a 35 billion parameter highly performant generative model",
574
+ "endpoints": [
575
+ {
576
+ "type": "cohere",
577
+ <!-- optionally specify these, or use COHERE_API_TOKEN
578
+ "apiKey": "your-api-token"
579
+ -->
580
+ }
581
+ ]
582
+ }
583
+ ```
584
+
585
  ##### Google Vertex models
586
 
587
  Chat UI can connect to the google Vertex API endpoints ([List of supported models](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models)).
package-lock.json CHANGED
@@ -75,6 +75,7 @@
75
  "@anthropic-ai/sdk": "^0.17.1",
76
  "@google-cloud/vertexai": "^0.5.0",
77
  "aws4fetch": "^1.0.17",
 
78
  "openai": "^4.14.2"
79
  }
80
  },
@@ -2881,6 +2882,25 @@
2881
  "node": ">=8"
2882
  }
2883
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2884
  "node_modules/callsites": {
2885
  "version": "3.1.0",
2886
  "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
@@ -3041,6 +3061,19 @@
3041
  "@types/estree": "^1.0.0"
3042
  }
3043
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
3044
  "node_modules/color": {
3045
  "version": "4.2.3",
3046
  "resolved": "https://registry.npmjs.org/color/-/color-4.2.3.tgz",
@@ -3358,6 +3391,23 @@
3358
  "node": ">=0.10.0"
3359
  }
3360
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3361
  "node_modules/delayed-stream": {
3362
  "version": "1.0.0",
3363
  "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
@@ -3508,6 +3558,27 @@
3508
  "url": "https://github.com/fb55/entities?sponsor=1"
3509
  }
3510
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3511
  "node_modules/es6-promise": {
3512
  "version": "3.3.1",
3513
  "resolved": "https://registry.npmjs.org/es6-promise/-/es6-promise-3.3.1.tgz",
@@ -4107,6 +4178,25 @@
4107
  "node": "*"
4108
  }
4109
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4110
  "node_modules/get-stream": {
4111
  "version": "6.0.1",
4112
  "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-6.0.1.tgz",
@@ -4219,6 +4309,18 @@
4219
  "node": ">=14"
4220
  }
4221
  },
 
 
 
 
 
 
 
 
 
 
 
 
4222
  "node_modules/graceful-fs": {
4223
  "version": "4.2.11",
4224
  "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz",
@@ -4284,6 +4386,42 @@
4284
  "node": ">=8"
4285
  }
4286
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4287
  "node_modules/hash-wasm": {
4288
  "version": "4.9.0",
4289
  "resolved": "https://registry.npmjs.org/hash-wasm/-/hash-wasm-4.9.0.tgz",
@@ -4625,6 +4763,12 @@
4625
  "url": "https://github.com/sponsors/panva"
4626
  }
4627
  },
 
 
 
 
 
 
4628
  "node_modules/js-sdsl": {
4629
  "version": "4.3.0",
4630
  "resolved": "https://registry.npmjs.org/js-sdsl/-/js-sdsl-4.3.0.tgz",
@@ -5426,6 +5570,15 @@
5426
  "node": ">= 6"
5427
  }
5428
  },
 
 
 
 
 
 
 
 
 
5429
  "node_modules/object-stream": {
5430
  "version": "0.0.1",
5431
  "resolved": "https://registry.npmjs.org/object-stream/-/object-stream-0.0.1.tgz",
@@ -6225,6 +6378,21 @@
6225
  "teleport": ">=0.2.0"
6226
  }
6227
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6228
  "node_modules/querystringify": {
6229
  "version": "2.2.0",
6230
  "resolved": "https://registry.npmjs.org/querystringify/-/querystringify-2.2.0.tgz",
@@ -6557,6 +6725,23 @@
6557
  "integrity": "sha512-RVnVQxTXuerk653XfuliOxBP81Sf0+qfQE73LIYKcyMYHG94AuH0kgrQpRDuTZnSmjpysHmzxJXKNfa6PjFhyQ==",
6558
  "dev": true
6559
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6560
  "node_modules/sharp": {
6561
  "version": "0.33.2",
6562
  "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.33.2.tgz",
@@ -6617,6 +6802,24 @@
6617
  "node": ">=8"
6618
  }
6619
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6620
  "node_modules/siginfo": {
6621
  "version": "2.0.0",
6622
  "resolved": "https://registry.npmjs.org/siginfo/-/siginfo-2.0.0.tgz",
@@ -7647,6 +7850,12 @@
7647
  "punycode": "^2.1.0"
7648
  }
7649
  },
 
 
 
 
 
 
7650
  "node_modules/url-parse": {
7651
  "version": "1.5.10",
7652
  "resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.10.tgz",
 
75
  "@anthropic-ai/sdk": "^0.17.1",
76
  "@google-cloud/vertexai": "^0.5.0",
77
  "aws4fetch": "^1.0.17",
78
+ "cohere-ai": "^7.9.0",
79
  "openai": "^4.14.2"
80
  }
81
  },
 
2882
  "node": ">=8"
2883
  }
2884
  },
2885
+ "node_modules/call-bind": {
2886
+ "version": "1.0.7",
2887
+ "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.7.tgz",
2888
+ "integrity": "sha512-GHTSNSYICQ7scH7sZ+M2rFopRoLh8t2bLSW6BbgrtLsahOIB5iyAVJf9GjWK3cYTDaMj4XdBpM1cA6pIS0Kv2w==",
2889
+ "optional": true,
2890
+ "dependencies": {
2891
+ "es-define-property": "^1.0.0",
2892
+ "es-errors": "^1.3.0",
2893
+ "function-bind": "^1.1.2",
2894
+ "get-intrinsic": "^1.2.4",
2895
+ "set-function-length": "^1.2.1"
2896
+ },
2897
+ "engines": {
2898
+ "node": ">= 0.4"
2899
+ },
2900
+ "funding": {
2901
+ "url": "https://github.com/sponsors/ljharb"
2902
+ }
2903
+ },
2904
  "node_modules/callsites": {
2905
  "version": "3.1.0",
2906
  "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
 
3061
  "@types/estree": "^1.0.0"
3062
  }
3063
  },
3064
+ "node_modules/cohere-ai": {
3065
+ "version": "7.9.0",
3066
+ "resolved": "https://registry.npmjs.org/cohere-ai/-/cohere-ai-7.9.0.tgz",
3067
+ "integrity": "sha512-iHPG4dule+nMlw88Xe0USGZbLlXuRC4yvOvfCqoEdW9tHOc0vkiPfiyjBalDcPKj9KEvWZfii84kyN5HyTMySw==",
3068
+ "optional": true,
3069
+ "dependencies": {
3070
+ "form-data": "4.0.0",
3071
+ "js-base64": "3.7.2",
3072
+ "node-fetch": "2.7.0",
3073
+ "qs": "6.11.2",
3074
+ "url-join": "4.0.1"
3075
+ }
3076
+ },
3077
  "node_modules/color": {
3078
  "version": "4.2.3",
3079
  "resolved": "https://registry.npmjs.org/color/-/color-4.2.3.tgz",
 
3391
  "node": ">=0.10.0"
3392
  }
3393
  },
3394
+ "node_modules/define-data-property": {
3395
+ "version": "1.1.4",
3396
+ "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz",
3397
+ "integrity": "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==",
3398
+ "optional": true,
3399
+ "dependencies": {
3400
+ "es-define-property": "^1.0.0",
3401
+ "es-errors": "^1.3.0",
3402
+ "gopd": "^1.0.1"
3403
+ },
3404
+ "engines": {
3405
+ "node": ">= 0.4"
3406
+ },
3407
+ "funding": {
3408
+ "url": "https://github.com/sponsors/ljharb"
3409
+ }
3410
+ },
3411
  "node_modules/delayed-stream": {
3412
  "version": "1.0.0",
3413
  "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
 
3558
  "url": "https://github.com/fb55/entities?sponsor=1"
3559
  }
3560
  },
3561
+ "node_modules/es-define-property": {
3562
+ "version": "1.0.0",
3563
+ "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.0.tgz",
3564
+ "integrity": "sha512-jxayLKShrEqqzJ0eumQbVhTYQM27CfT1T35+gCgDFoL82JLsXqTJ76zv6A0YLOgEnLUMvLzsDsGIrl8NFpT2gQ==",
3565
+ "optional": true,
3566
+ "dependencies": {
3567
+ "get-intrinsic": "^1.2.4"
3568
+ },
3569
+ "engines": {
3570
+ "node": ">= 0.4"
3571
+ }
3572
+ },
3573
+ "node_modules/es-errors": {
3574
+ "version": "1.3.0",
3575
+ "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
3576
+ "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
3577
+ "optional": true,
3578
+ "engines": {
3579
+ "node": ">= 0.4"
3580
+ }
3581
+ },
3582
  "node_modules/es6-promise": {
3583
  "version": "3.3.1",
3584
  "resolved": "https://registry.npmjs.org/es6-promise/-/es6-promise-3.3.1.tgz",
 
4178
  "node": "*"
4179
  }
4180
  },
4181
+ "node_modules/get-intrinsic": {
4182
+ "version": "1.2.4",
4183
+ "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.4.tgz",
4184
+ "integrity": "sha512-5uYhsJH8VJBTv7oslg4BznJYhDoRI6waYCxMmCdnTrcCrHA/fCFKoTFz2JKKE0HdDFUF7/oQuhzumXJK7paBRQ==",
4185
+ "optional": true,
4186
+ "dependencies": {
4187
+ "es-errors": "^1.3.0",
4188
+ "function-bind": "^1.1.2",
4189
+ "has-proto": "^1.0.1",
4190
+ "has-symbols": "^1.0.3",
4191
+ "hasown": "^2.0.0"
4192
+ },
4193
+ "engines": {
4194
+ "node": ">= 0.4"
4195
+ },
4196
+ "funding": {
4197
+ "url": "https://github.com/sponsors/ljharb"
4198
+ }
4199
+ },
4200
  "node_modules/get-stream": {
4201
  "version": "6.0.1",
4202
  "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-6.0.1.tgz",
 
4309
  "node": ">=14"
4310
  }
4311
  },
4312
+ "node_modules/gopd": {
4313
+ "version": "1.0.1",
4314
+ "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.0.1.tgz",
4315
+ "integrity": "sha512-d65bNlIadxvpb/A2abVdlqKqV563juRnZ1Wtk6s1sIR8uNsXR70xqIzVqxVf1eTqDunwT2MkczEeaezCKTZhwA==",
4316
+ "optional": true,
4317
+ "dependencies": {
4318
+ "get-intrinsic": "^1.1.3"
4319
+ },
4320
+ "funding": {
4321
+ "url": "https://github.com/sponsors/ljharb"
4322
+ }
4323
+ },
4324
  "node_modules/graceful-fs": {
4325
  "version": "4.2.11",
4326
  "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz",
 
4386
  "node": ">=8"
4387
  }
4388
  },
4389
+ "node_modules/has-property-descriptors": {
4390
+ "version": "1.0.2",
4391
+ "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz",
4392
+ "integrity": "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==",
4393
+ "optional": true,
4394
+ "dependencies": {
4395
+ "es-define-property": "^1.0.0"
4396
+ },
4397
+ "funding": {
4398
+ "url": "https://github.com/sponsors/ljharb"
4399
+ }
4400
+ },
4401
+ "node_modules/has-proto": {
4402
+ "version": "1.0.3",
4403
+ "resolved": "https://registry.npmjs.org/has-proto/-/has-proto-1.0.3.tgz",
4404
+ "integrity": "sha512-SJ1amZAJUiZS+PhsVLf5tGydlaVB8EdFpaSO4gmiUKUOxk8qzn5AIy4ZeJUmh22znIdk/uMAUT2pl3FxzVUH+Q==",
4405
+ "optional": true,
4406
+ "engines": {
4407
+ "node": ">= 0.4"
4408
+ },
4409
+ "funding": {
4410
+ "url": "https://github.com/sponsors/ljharb"
4411
+ }
4412
+ },
4413
+ "node_modules/has-symbols": {
4414
+ "version": "1.0.3",
4415
+ "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.0.3.tgz",
4416
+ "integrity": "sha512-l3LCuF6MgDNwTDKkdYGEihYjt5pRPbEg46rtlmnSPlUbgmB8LOIrKJbYYFBSbnPaJexMKtiPO8hmeRjRz2Td+A==",
4417
+ "optional": true,
4418
+ "engines": {
4419
+ "node": ">= 0.4"
4420
+ },
4421
+ "funding": {
4422
+ "url": "https://github.com/sponsors/ljharb"
4423
+ }
4424
+ },
4425
  "node_modules/hash-wasm": {
4426
  "version": "4.9.0",
4427
  "resolved": "https://registry.npmjs.org/hash-wasm/-/hash-wasm-4.9.0.tgz",
 
4763
  "url": "https://github.com/sponsors/panva"
4764
  }
4765
  },
4766
+ "node_modules/js-base64": {
4767
+ "version": "3.7.2",
4768
+ "resolved": "https://registry.npmjs.org/js-base64/-/js-base64-3.7.2.tgz",
4769
+ "integrity": "sha512-NnRs6dsyqUXejqk/yv2aiXlAvOs56sLkX6nUdeaNezI5LFFLlsZjOThmwnrcwh5ZZRwZlCMnVAY3CvhIhoVEKQ==",
4770
+ "optional": true
4771
+ },
4772
  "node_modules/js-sdsl": {
4773
  "version": "4.3.0",
4774
  "resolved": "https://registry.npmjs.org/js-sdsl/-/js-sdsl-4.3.0.tgz",
 
5570
  "node": ">= 6"
5571
  }
5572
  },
5573
+ "node_modules/object-inspect": {
5574
+ "version": "1.13.1",
5575
+ "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.1.tgz",
5576
+ "integrity": "sha512-5qoj1RUiKOMsCCNLV1CBiPYE10sziTsnmNxkAI/rZhiD63CF7IqdFGC/XzjWjpSgLf0LxXX3bDFIh0E18f6UhQ==",
5577
+ "optional": true,
5578
+ "funding": {
5579
+ "url": "https://github.com/sponsors/ljharb"
5580
+ }
5581
+ },
5582
  "node_modules/object-stream": {
5583
  "version": "0.0.1",
5584
  "resolved": "https://registry.npmjs.org/object-stream/-/object-stream-0.0.1.tgz",
 
6378
  "teleport": ">=0.2.0"
6379
  }
6380
  },
6381
+ "node_modules/qs": {
6382
+ "version": "6.11.2",
6383
+ "resolved": "https://registry.npmjs.org/qs/-/qs-6.11.2.tgz",
6384
+ "integrity": "sha512-tDNIz22aBzCDxLtVH++VnTfzxlfeK5CbqohpSqpJgj1Wg/cQbStNAz3NuqCs5vV+pjBsK4x4pN9HlVh7rcYRiA==",
6385
+ "optional": true,
6386
+ "dependencies": {
6387
+ "side-channel": "^1.0.4"
6388
+ },
6389
+ "engines": {
6390
+ "node": ">=0.6"
6391
+ },
6392
+ "funding": {
6393
+ "url": "https://github.com/sponsors/ljharb"
6394
+ }
6395
+ },
6396
  "node_modules/querystringify": {
6397
  "version": "2.2.0",
6398
  "resolved": "https://registry.npmjs.org/querystringify/-/querystringify-2.2.0.tgz",
 
6725
  "integrity": "sha512-RVnVQxTXuerk653XfuliOxBP81Sf0+qfQE73LIYKcyMYHG94AuH0kgrQpRDuTZnSmjpysHmzxJXKNfa6PjFhyQ==",
6726
  "dev": true
6727
  },
6728
+ "node_modules/set-function-length": {
6729
+ "version": "1.2.2",
6730
+ "resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.2.tgz",
6731
+ "integrity": "sha512-pgRc4hJ4/sNjWCSS9AmnS40x3bNMDTknHgL5UaMBTMyJnU90EgWh1Rz+MC9eFu4BuN/UwZjKQuY/1v3rM7HMfg==",
6732
+ "optional": true,
6733
+ "dependencies": {
6734
+ "define-data-property": "^1.1.4",
6735
+ "es-errors": "^1.3.0",
6736
+ "function-bind": "^1.1.2",
6737
+ "get-intrinsic": "^1.2.4",
6738
+ "gopd": "^1.0.1",
6739
+ "has-property-descriptors": "^1.0.2"
6740
+ },
6741
+ "engines": {
6742
+ "node": ">= 0.4"
6743
+ }
6744
+ },
6745
  "node_modules/sharp": {
6746
  "version": "0.33.2",
6747
  "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.33.2.tgz",
 
6802
  "node": ">=8"
6803
  }
6804
  },
6805
+ "node_modules/side-channel": {
6806
+ "version": "1.0.6",
6807
+ "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.6.tgz",
6808
+ "integrity": "sha512-fDW/EZ6Q9RiO8eFG8Hj+7u/oW+XrPTIChwCOM2+th2A6OblDtYYIpve9m+KvI9Z4C9qSEXlaGR6bTEYHReuglA==",
6809
+ "optional": true,
6810
+ "dependencies": {
6811
+ "call-bind": "^1.0.7",
6812
+ "es-errors": "^1.3.0",
6813
+ "get-intrinsic": "^1.2.4",
6814
+ "object-inspect": "^1.13.1"
6815
+ },
6816
+ "engines": {
6817
+ "node": ">= 0.4"
6818
+ },
6819
+ "funding": {
6820
+ "url": "https://github.com/sponsors/ljharb"
6821
+ }
6822
+ },
6823
  "node_modules/siginfo": {
6824
  "version": "2.0.0",
6825
  "resolved": "https://registry.npmjs.org/siginfo/-/siginfo-2.0.0.tgz",
 
7850
  "punycode": "^2.1.0"
7851
  }
7852
  },
7853
+ "node_modules/url-join": {
7854
+ "version": "4.0.1",
7855
+ "resolved": "https://registry.npmjs.org/url-join/-/url-join-4.0.1.tgz",
7856
+ "integrity": "sha512-jk1+QP6ZJqyOiuEI9AEWQfju/nB2Pw466kbA0LEZljHwKeMgd9WrAEgEGxjPDD2+TNbbb37rTyhEfrCXfuKXnA==",
7857
+ "optional": true
7858
+ },
7859
  "node_modules/url-parse": {
7860
  "version": "1.5.10",
7861
  "resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.10.tgz",
package.json CHANGED
@@ -85,6 +85,7 @@
85
  "@anthropic-ai/sdk": "^0.17.1",
86
  "@google-cloud/vertexai": "^0.5.0",
87
  "aws4fetch": "^1.0.17",
 
88
  "openai": "^4.14.2"
89
  }
90
  }
 
85
  "@anthropic-ai/sdk": "^0.17.1",
86
  "@google-cloud/vertexai": "^0.5.0",
87
  "aws4fetch": "^1.0.17",
88
+ "cohere-ai": "^7.9.0",
89
  "openai": "^4.14.2"
90
  }
91
  }
src/lib/server/endpoints/cohere/endpointCohere.ts ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { z } from "zod";
2
+ import { COHERE_API_TOKEN } from "$env/static/private";
3
+ import type { Endpoint } from "../endpoints";
4
+ import type { TextGenerationStreamOutput } from "@huggingface/inference";
5
+ import type { Cohere, CohereClient } from "cohere-ai";
6
+ import { buildPrompt } from "$lib/buildPrompt";
7
+
8
+ export const endpointCohereParametersSchema = z.object({
9
+ weight: z.number().int().positive().default(1),
10
+ model: z.any(),
11
+ type: z.literal("cohere"),
12
+ apiKey: z.string().default(COHERE_API_TOKEN),
13
+ raw: z.boolean().default(false),
14
+ });
15
+
16
+ export async function endpointCohere(
17
+ input: z.input<typeof endpointCohereParametersSchema>
18
+ ): Promise<Endpoint> {
19
+ const { apiKey, model, raw } = endpointCohereParametersSchema.parse(input);
20
+
21
+ let cohere: CohereClient;
22
+
23
+ try {
24
+ cohere = new (await import("cohere-ai")).CohereClient({
25
+ token: apiKey,
26
+ });
27
+ } catch (e) {
28
+ throw new Error("Failed to import @anthropic-ai/sdk", { cause: e });
29
+ }
30
+
31
+ return async ({ messages, preprompt, generateSettings, continueMessage }) => {
32
+ let system = preprompt;
33
+ if (messages?.[0]?.from === "system") {
34
+ system = messages[0].content;
35
+ }
36
+
37
+ const parameters = { ...model.parameters, ...generateSettings };
38
+
39
+ return (async function* () {
40
+ let stream;
41
+ let tokenId = 0;
42
+
43
+ if (raw) {
44
+ const prompt = await buildPrompt({
45
+ messages: messages.filter((message) => message.from !== "system"),
46
+ model,
47
+ preprompt: system,
48
+ continueMessage,
49
+ });
50
+
51
+ stream = await cohere.chatStream({
52
+ message: prompt,
53
+ rawPrompting: true,
54
+ model: model.id ?? model.name,
55
+ p: parameters?.top_p,
56
+ k: parameters?.top_k,
57
+ maxTokens: parameters?.max_new_tokens,
58
+ temperature: parameters?.temperature,
59
+ stopSequences: parameters?.stop,
60
+ frequencyPenalty: parameters?.frequency_penalty,
61
+ });
62
+ } else {
63
+ const formattedMessages = messages
64
+ .filter((message) => message.from !== "system")
65
+ .map((message) => ({
66
+ role: message.from === "user" ? "USER" : "CHATBOT",
67
+ message: message.content,
68
+ })) satisfies Cohere.ChatMessage[];
69
+
70
+ stream = await cohere.chatStream({
71
+ model: model.id ?? model.name,
72
+ chatHistory: formattedMessages.slice(0, -1),
73
+ message: formattedMessages[formattedMessages.length - 1].message,
74
+ preamble: system,
75
+ p: parameters?.top_p,
76
+ k: parameters?.top_k,
77
+ maxTokens: parameters?.max_new_tokens,
78
+ temperature: parameters?.temperature,
79
+ stopSequences: parameters?.stop,
80
+ frequencyPenalty: parameters?.frequency_penalty,
81
+ });
82
+ }
83
+
84
+ for await (const output of stream) {
85
+ if (output.eventType === "text-generation") {
86
+ yield {
87
+ token: {
88
+ id: tokenId++,
89
+ text: output.text,
90
+ logprob: 0,
91
+ special: false,
92
+ },
93
+ generated_text: null,
94
+ details: null,
95
+ } satisfies TextGenerationStreamOutput;
96
+ } else if (output.eventType === "stream-end") {
97
+ if (["ERROR", "ERROR_TOXIC", "ERROR_LIMIT"].includes(output.finishReason)) {
98
+ throw new Error(output.finishReason);
99
+ }
100
+ yield {
101
+ token: {
102
+ id: tokenId++,
103
+ text: "",
104
+ logprob: 0,
105
+ special: true,
106
+ },
107
+ generated_text: output.response.text,
108
+ details: null,
109
+ };
110
+ }
111
+ }
112
+ })();
113
+ };
114
+ }
src/lib/server/endpoints/endpoints.ts CHANGED
@@ -16,6 +16,7 @@ import type { Model } from "$lib/types/Model";
16
  import endpointCloudflare, {
17
  endpointCloudflareParametersSchema,
18
  } from "./cloudflare/endpointCloudflare";
 
19
 
20
  // parameters passed when generating text
21
  export interface EndpointParameters {
@@ -46,6 +47,7 @@ export const endpoints = {
46
  ollama: endpointOllama,
47
  vertex: endpointVertex,
48
  cloudflare: endpointCloudflare,
 
49
  };
50
 
51
  export const endpointSchema = z.discriminatedUnion("type", [
@@ -57,5 +59,6 @@ export const endpointSchema = z.discriminatedUnion("type", [
57
  endpointOllamaParametersSchema,
58
  endpointVertexParametersSchema,
59
  endpointCloudflareParametersSchema,
 
60
  ]);
61
  export default endpoints;
 
16
  import endpointCloudflare, {
17
  endpointCloudflareParametersSchema,
18
  } from "./cloudflare/endpointCloudflare";
19
+ import { endpointCohere, endpointCohereParametersSchema } from "./cohere/endpointCohere";
20
 
21
  // parameters passed when generating text
22
  export interface EndpointParameters {
 
47
  ollama: endpointOllama,
48
  vertex: endpointVertex,
49
  cloudflare: endpointCloudflare,
50
+ cohere: endpointCohere,
51
  };
52
 
53
  export const endpointSchema = z.discriminatedUnion("type", [
 
59
  endpointOllamaParametersSchema,
60
  endpointVertexParametersSchema,
61
  endpointCloudflareParametersSchema,
62
+ endpointCohereParametersSchema,
63
  ]);
64
  export default endpoints;
src/lib/server/models.ts CHANGED
@@ -174,6 +174,8 @@ const addEndpoint = (m: Awaited<ReturnType<typeof processModel>>) => ({
174
  return await endpoints.vertex(args);
175
  case "cloudflare":
176
  return await endpoints.cloudflare(args);
 
 
177
  default:
178
  // for legacy reason
179
  return endpoints.tgi(args);
 
174
  return await endpoints.vertex(args);
175
  case "cloudflare":
176
  return await endpoints.cloudflare(args);
177
+ case "cohere":
178
+ return await endpoints.cohere(args);
179
  default:
180
  // for legacy reason
181
  return endpoints.tgi(args);