nsarrazin HF staff commited on
Commit
537b6f5
1 Parent(s): 21c9b41

Add limits on API endpoints (#886)

Browse files

* Add limits on messages, conversations, assistants and messages/minute

* Add max message length limit

* remove rate limits from public config

* add `RATE_LIMITS` to secrets

* Add `MESSAGES_BEFORE_LOGIN` to secrets

* replace `RATE_LIMITS` by `USAGE_LIMITS`

* replace `RateLimits` by `usageLimits` and only get nEvents if needed

* rename schema too

* replace \r\n by \n

.env CHANGED
@@ -113,7 +113,7 @@ ADMIN_API_SECRET=# secret to admin API calls, like computing usage stats or expo
113
 
114
  PARQUET_EXPORT_SECRET=#DEPRECATED, use ADMIN_API_SECRET instead
115
 
116
- RATE_LIMIT= # requests per minute
117
  MESSAGES_BEFORE_LOGIN=# how many messages a user can send in a conversation before having to login. set to 0 to force login right away
118
 
119
  APP_BASE="" # base path of the app, e.g. /chat, left blank as default
@@ -140,4 +140,7 @@ ALTERNATIVE_REDIRECT_URLS=`[]` #valide alternative redirect URL for OAuth
140
 
141
  WEBHOOK_URL_REPORT_ASSISTANT=#provide webhook url to get notified when an assistant gets reported
142
 
143
- ALLOWED_USER_EMAILS=`[]` # if it's defined, only these emails will be allowed to use the app
 
 
 
113
 
114
  PARQUET_EXPORT_SECRET=#DEPRECATED, use ADMIN_API_SECRET instead
115
 
116
+ RATE_LIMIT= # /!\ Legacy definition of messages per minute. Use USAGE_LIMITS.messagesPerMinute instead
117
  MESSAGES_BEFORE_LOGIN=# how many messages a user can send in a conversation before having to login. set to 0 to force login right away
118
 
119
  APP_BASE="" # base path of the app, e.g. /chat, left blank as default
140
 
141
  WEBHOOK_URL_REPORT_ASSISTANT=#provide webhook url to get notified when an assistant gets reported
142
 
143
+ ALLOWED_USER_EMAILS=`[]` # if it's defined, only these emails will be allowed to use the app
144
+
145
+ USAGE_LIMITS=`{}`
146
+
.env.template CHANGED
@@ -269,9 +269,6 @@ PUBLIC_APP_DISCLAIMER_MESSAGE="Disclaimer: AI is an area of active research with
269
  PUBLIC_APP_DATA_SHARING=1
270
  PUBLIC_APP_DISCLAIMER=1
271
 
272
- RATE_LIMIT=16
273
- MESSAGES_BEFORE_LOGIN=5# how many messages a user can send in a conversation before having to login. set to 0 to force login right away
274
-
275
  PUBLIC_GOOGLE_ANALYTICS_ID=G-8Q63TH4CSL
276
  PUBLIC_PLAUSIBLE_SCRIPT_URL="/js/script.js"
277
 
269
  PUBLIC_APP_DATA_SHARING=1
270
  PUBLIC_APP_DISCLAIMER=1
271
 
 
 
 
272
  PUBLIC_GOOGLE_ANALYTICS_ID=G-8Q63TH4CSL
273
  PUBLIC_PLAUSIBLE_SCRIPT_URL="/js/script.js"
274
 
.github/workflows/deploy-release.yml CHANGED
@@ -27,6 +27,8 @@ jobs:
27
  HF_DEPLOYMENT_TOKEN: ${{ secrets.HF_DEPLOYMENT_TOKEN }}
28
  WEBHOOK_URL_REPORT_ASSISTANT: ${{ secrets.WEBHOOK_URL_REPORT_ASSISTANT }}
29
  ADMIN_API_SECRET: ${{ secrets.ADMIN_API_SECRET }}
 
 
30
  run: npm run updateProdEnv
31
  sync-to-hub:
32
  runs-on: ubuntu-latest
27
  HF_DEPLOYMENT_TOKEN: ${{ secrets.HF_DEPLOYMENT_TOKEN }}
28
  WEBHOOK_URL_REPORT_ASSISTANT: ${{ secrets.WEBHOOK_URL_REPORT_ASSISTANT }}
29
  ADMIN_API_SECRET: ${{ secrets.ADMIN_API_SECRET }}
30
+ USAGE_LIMITS: ${{ secrets.USAGE_LIMITS }}
31
+ MESSAGES_BEFORE_LOGIN: ${{ secrets.MESSAGES_BEFORE_LOGIN }}
32
  run: npm run updateProdEnv
33
  sync-to-hub:
34
  runs-on: ubuntu-latest
scripts/updateProdEnv.ts CHANGED
@@ -8,6 +8,8 @@ const MONGODB_URL = process.env.MONGODB_URL;
8
  const HF_TOKEN = process.env.HF_TOKEN ?? process.env.HF_ACCESS_TOKEN; // token used for API requests in prod
9
  const WEBHOOK_URL_REPORT_ASSISTANT = process.env.WEBHOOK_URL_REPORT_ASSISTANT; // slack webhook url used to get "report assistant" events
10
  const ADMIN_API_SECRET = process.env.ADMIN_API_SECRET;
 
 
11
 
12
  // Read the content of the file .env.template
13
  const PUBLIC_CONFIG = fs.readFileSync(".env.template", "utf8");
@@ -20,6 +22,8 @@ SERPER_API_KEY=${SERPER_API_KEY}
20
  HF_TOKEN=${HF_TOKEN}
21
  WEBHOOK_URL_REPORT_ASSISTANT=${WEBHOOK_URL_REPORT_ASSISTANT}
22
  ADMIN_API_SECRET=${ADMIN_API_SECRET}
 
 
23
  `;
24
 
25
  // Make an HTTP POST request to add the space secrets
8
  const HF_TOKEN = process.env.HF_TOKEN ?? process.env.HF_ACCESS_TOKEN; // token used for API requests in prod
9
  const WEBHOOK_URL_REPORT_ASSISTANT = process.env.WEBHOOK_URL_REPORT_ASSISTANT; // slack webhook url used to get "report assistant" events
10
  const ADMIN_API_SECRET = process.env.ADMIN_API_SECRET;
11
+ const USAGE_LIMITS = process.env.USAGE_LIMITS;
12
+ const MESSAGES_BEFORE_LOGIN = process.env.MESSAGES_BEFORE_LOGIN;
13
 
14
  // Read the content of the file .env.template
15
  const PUBLIC_CONFIG = fs.readFileSync(".env.template", "utf8");
22
  HF_TOKEN=${HF_TOKEN}
23
  WEBHOOK_URL_REPORT_ASSISTANT=${WEBHOOK_URL_REPORT_ASSISTANT}
24
  ADMIN_API_SECRET=${ADMIN_API_SECRET}
25
+ USAGE_LIMITS=${USAGE_LIMITS}
26
+ MESSAGES_BEFORE_LOGIN=${MESSAGES_BEFORE_LOGIN}
27
  `;
28
 
29
  // Make an HTTP POST request to add the space secrets
src/lib/server/usageLimits.ts ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { z } from "zod";
2
+ import { USAGE_LIMITS, RATE_LIMIT } from "$env/static/private";
3
+ import JSON5 from "json5";
4
+
5
+ // RATE_LIMIT is the legacy way to define messages per minute limit
6
+ export const usageLimitsSchema = z
7
+ .object({
8
+ conversations: z.coerce.number().optional(), // how many conversations
9
+ messages: z.coerce.number().optional(), // how many messages in a conversation
10
+ assistants: z.coerce.number().optional(), // how many assistants
11
+ messageLength: z.coerce.number().optional(), // how long can a message be before we cut it off
12
+ messagesPerMinute: z
13
+ .preprocess((val) => {
14
+ if (val === undefined) {
15
+ return RATE_LIMIT;
16
+ }
17
+ return val;
18
+ }, z.coerce.number().optional())
19
+ .optional(), // how many messages per minute
20
+ })
21
+ .optional();
22
+
23
+ export const usageLimits = usageLimitsSchema.parse(JSON5.parse(USAGE_LIMITS));
src/routes/+page.svelte CHANGED
@@ -47,8 +47,9 @@
47
  });
48
 
49
  if (!res.ok) {
50
- error.set("Error while creating conversation, try again.");
51
- console.error("Error while creating conversation: " + (await res.text()));
 
52
  return;
53
  }
54
 
@@ -63,7 +64,7 @@
63
  // invalidateAll to update list of conversations
64
  await goto(`${base}/conversation/${conversationId}`, { invalidateAll: true });
65
  } catch (err) {
66
- error.set(ERROR_MESSAGES.default);
67
  console.error(err);
68
  } finally {
69
  loading = false;
47
  });
48
 
49
  if (!res.ok) {
50
+ const errorMessage = (await res.json()).message || ERROR_MESSAGES.default;
51
+ error.set(errorMessage);
52
+ console.error("Error while creating conversation: ", errorMessage);
53
  return;
54
  }
55
 
64
  // invalidateAll to update list of conversations
65
  await goto(`${base}/conversation/${conversationId}`, { invalidateAll: true });
66
  } catch (err) {
67
+ error.set((err as Error).message || ERROR_MESSAGES.default);
68
  console.error(err);
69
  } finally {
70
  loading = false;
src/routes/conversation/+server.ts CHANGED
@@ -8,6 +8,8 @@ import type { Message } from "$lib/types/Message";
8
  import { models, validateModel } from "$lib/server/models";
9
  import { defaultEmbeddingModel } from "$lib/server/embeddingModels";
10
  import { v4 } from "uuid";
 
 
11
 
12
  export const POST: RequestHandler = async ({ locals, request }) => {
13
  const body = await request.text();
@@ -23,6 +25,15 @@ export const POST: RequestHandler = async ({ locals, request }) => {
23
  })
24
  .parse(JSON.parse(body));
25
 
 
 
 
 
 
 
 
 
 
26
  let messages: Message[] = [
27
  {
28
  id: v4(),
8
  import { models, validateModel } from "$lib/server/models";
9
  import { defaultEmbeddingModel } from "$lib/server/embeddingModels";
10
  import { v4 } from "uuid";
11
+ import { authCondition } from "$lib/server/auth";
12
+ import { usageLimits } from "$lib/server/usageLimits";
13
 
14
  export const POST: RequestHandler = async ({ locals, request }) => {
15
  const body = await request.text();
25
  })
26
  .parse(JSON.parse(body));
27
 
28
+ const convCount = await collections.conversations.countDocuments(authCondition(locals));
29
+
30
+ if (usageLimits?.conversations && convCount > usageLimits?.conversations) {
31
+ throw error(
32
+ 429,
33
+ "You have reached the maximum number of conversations. Delete some to continue."
34
+ );
35
+ }
36
+
37
  let messages: Message[] = [
38
  {
39
  id: v4(),
src/routes/conversation/[id]/+page.svelte CHANGED
@@ -43,7 +43,7 @@
43
  });
44
 
45
  if (!res.ok) {
46
- error.set("Error while creating conversation, try again.");
47
  console.error("Error while creating conversation: " + (await res.text()));
48
  return;
49
  }
43
  });
44
 
45
  if (!res.ok) {
46
+ error.set(await res.text());
47
  console.error("Error while creating conversation: " + (await res.text()));
48
  return;
49
  }
src/routes/conversation/[id]/+server.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { MESSAGES_BEFORE_LOGIN, RATE_LIMIT } from "$env/static/private";
2
  import { authCondition, requiresUser } from "$lib/server/auth";
3
  import { collections } from "$lib/server/database";
4
  import { models } from "$lib/server/models";
@@ -19,6 +19,7 @@ import { buildSubtree } from "$lib/utils/tree/buildSubtree.js";
19
  import { addChildren } from "$lib/utils/tree/addChildren.js";
20
  import { addSibling } from "$lib/utils/tree/addSibling.js";
21
  import { preprocessMessages } from "$lib/server/preprocessMessages.js";
 
22
 
23
  export async function POST({ request, locals, params, getClientAddress }) {
24
  const id = z.string().parse(params.id);
@@ -95,14 +96,22 @@ export async function POST({ request, locals, params, getClientAddress }) {
95
  }
96
  }
97
 
98
- // check if the user is rate limited
99
- const nEvents = Math.max(
100
- await collections.messageEvents.countDocuments({ userId }),
101
- await collections.messageEvents.countDocuments({ ip: getClientAddress() })
102
- );
 
 
 
 
 
103
 
104
- if (RATE_LIMIT != "" && nEvents > parseInt(RATE_LIMIT)) {
105
- throw error(429, ERROR_MESSAGES.rateLimited);
 
 
 
106
  }
107
 
108
  // fetch the model
@@ -125,7 +134,13 @@ export async function POST({ request, locals, params, getClientAddress }) {
125
  } = z
126
  .object({
127
  id: z.string().uuid().refine(isMessageId).optional(), // parent message id to append to for a normal message, or the message id for a retry/continue
128
- inputs: z.optional(z.string().trim().min(1)),
 
 
 
 
 
 
129
  is_retry: z.optional(z.boolean()),
130
  is_continue: z.optional(z.boolean()),
131
  web_search: z.optional(z.boolean()),
@@ -133,6 +148,9 @@ export async function POST({ request, locals, params, getClientAddress }) {
133
  })
134
  .parse(json);
135
 
 
 
 
136
  // files is an array of base64 strings encoding Blob objects
137
  // we need to convert this array to an array of File objects
138
 
1
+ import { MESSAGES_BEFORE_LOGIN } from "$env/static/private";
2
  import { authCondition, requiresUser } from "$lib/server/auth";
3
  import { collections } from "$lib/server/database";
4
  import { models } from "$lib/server/models";
19
  import { addChildren } from "$lib/utils/tree/addChildren.js";
20
  import { addSibling } from "$lib/utils/tree/addSibling.js";
21
  import { preprocessMessages } from "$lib/server/preprocessMessages.js";
22
+ import { usageLimits } from "$lib/server/usageLimits";
23
 
24
  export async function POST({ request, locals, params, getClientAddress }) {
25
  const id = z.string().parse(params.id);
96
  }
97
  }
98
 
99
+ if (usageLimits?.messagesPerMinute) {
100
+ // check if the user is rate limited
101
+ const nEvents = Math.max(
102
+ await collections.messageEvents.countDocuments({ userId }),
103
+ await collections.messageEvents.countDocuments({ ip: getClientAddress() })
104
+ );
105
+ if (nEvents > usageLimits.messagesPerMinute) {
106
+ throw error(429, ERROR_MESSAGES.rateLimited);
107
+ }
108
+ }
109
 
110
+ if (usageLimits?.messages && conv.messages.length > usageLimits.messages) {
111
+ throw error(
112
+ 429,
113
+ `This conversation has more than ${usageLimits.messages} messages. Start a new one to continue`
114
+ );
115
  }
116
 
117
  // fetch the model
134
  } = z
135
  .object({
136
  id: z.string().uuid().refine(isMessageId).optional(), // parent message id to append to for a normal message, or the message id for a retry/continue
137
+ inputs: z.optional(
138
+ z
139
+ .string()
140
+ .trim()
141
+ .min(1)
142
+ .transform((s) => s.replace(/\r\n/g, "\n"))
143
+ ),
144
  is_retry: z.optional(z.boolean()),
145
  is_continue: z.optional(z.boolean()),
146
  web_search: z.optional(z.boolean()),
148
  })
149
  .parse(json);
150
 
151
+ if (usageLimits?.messageLength && (newPrompt?.length ?? 0) > usageLimits.messageLength) {
152
+ throw error(400, "Message too long.");
153
+ }
154
  // files is an array of base64 strings encoding Blob objects
155
  // we need to convert this array to an array of File objects
156
 
src/routes/settings/assistants/new/+page.server.ts CHANGED
@@ -7,6 +7,7 @@ import { ObjectId } from "mongodb";
7
  import { z } from "zod";
8
  import { sha256 } from "$lib/utils/sha256";
9
  import sharp from "sharp";
 
10
  import { generateSearchTokens } from "$lib/utils/searchTokens";
11
 
12
  const newAsssistantSchema = z.object({
@@ -62,6 +63,18 @@ export const actions: Actions = {
62
  return fail(400, { error: true, errors });
63
  }
64
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  const createdById = locals.user?._id ?? locals.sessionId;
66
 
67
  const newAssistantId = new ObjectId();
7
  import { z } from "zod";
8
  import { sha256 } from "$lib/utils/sha256";
9
  import sharp from "sharp";
10
+ import { usageLimits } from "$lib/server/usageLimits";
11
  import { generateSearchTokens } from "$lib/utils/searchTokens";
12
 
13
  const newAsssistantSchema = z.object({
63
  return fail(400, { error: true, errors });
64
  }
65
 
66
+ const assistantsCount = await collections.assistants.countDocuments(authCondition(locals));
67
+
68
+ if (usageLimits?.assistants && assistantsCount > usageLimits.assistants) {
69
+ const errors = [
70
+ {
71
+ field: "preprompt",
72
+ message: "You have reached the maximum number of assistants. Delete some to continue.",
73
+ },
74
+ ];
75
+ return fail(400, { error: true, errors });
76
+ }
77
+
78
  const createdById = locals.user?._id ?? locals.sessionId;
79
 
80
  const newAssistantId = new ObjectId();