Add limits on API endpoints (#886)
Browse files* Add limits on messages, conversations, assistants and messages/minute
* Add max message length limit
* remove rate limits from public config
* add `RATE_LIMITS` to secrets
* Add `MESSAGES_BEFORE_LOGIN` to secrets
* replace `RATE_LIMITS` by `USAGE_LIMITS`
* replace `RateLimits` by `usageLimits` and only get nEvents if needed
* rename schema too
* replace \r\n by \n
- .env +5 -2
- .env.template +0 -3
- .github/workflows/deploy-release.yml +2 -0
- scripts/updateProdEnv.ts +4 -0
- src/lib/server/usageLimits.ts +23 -0
- src/routes/+page.svelte +4 -3
- src/routes/conversation/+server.ts +11 -0
- src/routes/conversation/[id]/+page.svelte +1 -1
- src/routes/conversation/[id]/+server.ts +27 -9
- src/routes/settings/assistants/new/+page.server.ts +13 -0
.env
CHANGED
@@ -113,7 +113,7 @@ ADMIN_API_SECRET=# secret to admin API calls, like computing usage stats or expo
|
|
113 |
|
114 |
PARQUET_EXPORT_SECRET=#DEPRECATED, use ADMIN_API_SECRET instead
|
115 |
|
116 |
-
RATE_LIMIT= #
|
117 |
MESSAGES_BEFORE_LOGIN=# how many messages a user can send in a conversation before having to login. set to 0 to force login right away
|
118 |
|
119 |
APP_BASE="" # base path of the app, e.g. /chat, left blank as default
|
@@ -140,4 +140,7 @@ ALTERNATIVE_REDIRECT_URLS=`[]` #valide alternative redirect URL for OAuth
|
|
140 |
|
141 |
WEBHOOK_URL_REPORT_ASSISTANT=#provide webhook url to get notified when an assistant gets reported
|
142 |
|
143 |
-
ALLOWED_USER_EMAILS=`[]` # if it's defined, only these emails will be allowed to use the app
|
|
|
|
|
|
|
|
113 |
|
114 |
PARQUET_EXPORT_SECRET=#DEPRECATED, use ADMIN_API_SECRET instead
|
115 |
|
116 |
+
RATE_LIMIT= # /!\ Legacy definition of messages per minute. Use USAGE_LIMITS.messagesPerMinute instead
|
117 |
MESSAGES_BEFORE_LOGIN=# how many messages a user can send in a conversation before having to login. set to 0 to force login right away
|
118 |
|
119 |
APP_BASE="" # base path of the app, e.g. /chat, left blank as default
|
|
|
140 |
|
141 |
WEBHOOK_URL_REPORT_ASSISTANT=#provide webhook url to get notified when an assistant gets reported
|
142 |
|
143 |
+
ALLOWED_USER_EMAILS=`[]` # if it's defined, only these emails will be allowed to use the app
|
144 |
+
|
145 |
+
USAGE_LIMITS=`{}`
|
146 |
+
|
.env.template
CHANGED
@@ -269,9 +269,6 @@ PUBLIC_APP_DISCLAIMER_MESSAGE="Disclaimer: AI is an area of active research with
|
|
269 |
PUBLIC_APP_DATA_SHARING=1
|
270 |
PUBLIC_APP_DISCLAIMER=1
|
271 |
|
272 |
-
RATE_LIMIT=16
|
273 |
-
MESSAGES_BEFORE_LOGIN=5# how many messages a user can send in a conversation before having to login. set to 0 to force login right away
|
274 |
-
|
275 |
PUBLIC_GOOGLE_ANALYTICS_ID=G-8Q63TH4CSL
|
276 |
PUBLIC_PLAUSIBLE_SCRIPT_URL="/js/script.js"
|
277 |
|
|
|
269 |
PUBLIC_APP_DATA_SHARING=1
|
270 |
PUBLIC_APP_DISCLAIMER=1
|
271 |
|
|
|
|
|
|
|
272 |
PUBLIC_GOOGLE_ANALYTICS_ID=G-8Q63TH4CSL
|
273 |
PUBLIC_PLAUSIBLE_SCRIPT_URL="/js/script.js"
|
274 |
|
.github/workflows/deploy-release.yml
CHANGED
@@ -27,6 +27,8 @@ jobs:
|
|
27 |
HF_DEPLOYMENT_TOKEN: ${{ secrets.HF_DEPLOYMENT_TOKEN }}
|
28 |
WEBHOOK_URL_REPORT_ASSISTANT: ${{ secrets.WEBHOOK_URL_REPORT_ASSISTANT }}
|
29 |
ADMIN_API_SECRET: ${{ secrets.ADMIN_API_SECRET }}
|
|
|
|
|
30 |
run: npm run updateProdEnv
|
31 |
sync-to-hub:
|
32 |
runs-on: ubuntu-latest
|
|
|
27 |
HF_DEPLOYMENT_TOKEN: ${{ secrets.HF_DEPLOYMENT_TOKEN }}
|
28 |
WEBHOOK_URL_REPORT_ASSISTANT: ${{ secrets.WEBHOOK_URL_REPORT_ASSISTANT }}
|
29 |
ADMIN_API_SECRET: ${{ secrets.ADMIN_API_SECRET }}
|
30 |
+
USAGE_LIMITS: ${{ secrets.USAGE_LIMITS }}
|
31 |
+
MESSAGES_BEFORE_LOGIN: ${{ secrets.MESSAGES_BEFORE_LOGIN }}
|
32 |
run: npm run updateProdEnv
|
33 |
sync-to-hub:
|
34 |
runs-on: ubuntu-latest
|
scripts/updateProdEnv.ts
CHANGED
@@ -8,6 +8,8 @@ const MONGODB_URL = process.env.MONGODB_URL;
|
|
8 |
const HF_TOKEN = process.env.HF_TOKEN ?? process.env.HF_ACCESS_TOKEN; // token used for API requests in prod
|
9 |
const WEBHOOK_URL_REPORT_ASSISTANT = process.env.WEBHOOK_URL_REPORT_ASSISTANT; // slack webhook url used to get "report assistant" events
|
10 |
const ADMIN_API_SECRET = process.env.ADMIN_API_SECRET;
|
|
|
|
|
11 |
|
12 |
// Read the content of the file .env.template
|
13 |
const PUBLIC_CONFIG = fs.readFileSync(".env.template", "utf8");
|
@@ -20,6 +22,8 @@ SERPER_API_KEY=${SERPER_API_KEY}
|
|
20 |
HF_TOKEN=${HF_TOKEN}
|
21 |
WEBHOOK_URL_REPORT_ASSISTANT=${WEBHOOK_URL_REPORT_ASSISTANT}
|
22 |
ADMIN_API_SECRET=${ADMIN_API_SECRET}
|
|
|
|
|
23 |
`;
|
24 |
|
25 |
// Make an HTTP POST request to add the space secrets
|
|
|
8 |
const HF_TOKEN = process.env.HF_TOKEN ?? process.env.HF_ACCESS_TOKEN; // token used for API requests in prod
|
9 |
const WEBHOOK_URL_REPORT_ASSISTANT = process.env.WEBHOOK_URL_REPORT_ASSISTANT; // slack webhook url used to get "report assistant" events
|
10 |
const ADMIN_API_SECRET = process.env.ADMIN_API_SECRET;
|
11 |
+
const USAGE_LIMITS = process.env.USAGE_LIMITS;
|
12 |
+
const MESSAGES_BEFORE_LOGIN = process.env.MESSAGES_BEFORE_LOGIN;
|
13 |
|
14 |
// Read the content of the file .env.template
|
15 |
const PUBLIC_CONFIG = fs.readFileSync(".env.template", "utf8");
|
|
|
22 |
HF_TOKEN=${HF_TOKEN}
|
23 |
WEBHOOK_URL_REPORT_ASSISTANT=${WEBHOOK_URL_REPORT_ASSISTANT}
|
24 |
ADMIN_API_SECRET=${ADMIN_API_SECRET}
|
25 |
+
USAGE_LIMITS=${USAGE_LIMITS}
|
26 |
+
MESSAGES_BEFORE_LOGIN=${MESSAGES_BEFORE_LOGIN}
|
27 |
`;
|
28 |
|
29 |
// Make an HTTP POST request to add the space secrets
|
src/lib/server/usageLimits.ts
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { z } from "zod";
|
2 |
+
import { USAGE_LIMITS, RATE_LIMIT } from "$env/static/private";
|
3 |
+
import JSON5 from "json5";
|
4 |
+
|
5 |
+
// RATE_LIMIT is the legacy way to define messages per minute limit
|
6 |
+
export const usageLimitsSchema = z
|
7 |
+
.object({
|
8 |
+
conversations: z.coerce.number().optional(), // how many conversations
|
9 |
+
messages: z.coerce.number().optional(), // how many messages in a conversation
|
10 |
+
assistants: z.coerce.number().optional(), // how many assistants
|
11 |
+
messageLength: z.coerce.number().optional(), // how long can a message be before we cut it off
|
12 |
+
messagesPerMinute: z
|
13 |
+
.preprocess((val) => {
|
14 |
+
if (val === undefined) {
|
15 |
+
return RATE_LIMIT;
|
16 |
+
}
|
17 |
+
return val;
|
18 |
+
}, z.coerce.number().optional())
|
19 |
+
.optional(), // how many messages per minute
|
20 |
+
})
|
21 |
+
.optional();
|
22 |
+
|
23 |
+
export const usageLimits = usageLimitsSchema.parse(JSON5.parse(USAGE_LIMITS));
|
src/routes/+page.svelte
CHANGED
@@ -47,8 +47,9 @@
|
|
47 |
});
|
48 |
|
49 |
if (!res.ok) {
|
50 |
-
|
51 |
-
|
|
|
52 |
return;
|
53 |
}
|
54 |
|
@@ -63,7 +64,7 @@
|
|
63 |
// invalidateAll to update list of conversations
|
64 |
await goto(`${base}/conversation/${conversationId}`, { invalidateAll: true });
|
65 |
} catch (err) {
|
66 |
-
error.set(ERROR_MESSAGES.default);
|
67 |
console.error(err);
|
68 |
} finally {
|
69 |
loading = false;
|
|
|
47 |
});
|
48 |
|
49 |
if (!res.ok) {
|
50 |
+
const errorMessage = (await res.json()).message || ERROR_MESSAGES.default;
|
51 |
+
error.set(errorMessage);
|
52 |
+
console.error("Error while creating conversation: ", errorMessage);
|
53 |
return;
|
54 |
}
|
55 |
|
|
|
64 |
// invalidateAll to update list of conversations
|
65 |
await goto(`${base}/conversation/${conversationId}`, { invalidateAll: true });
|
66 |
} catch (err) {
|
67 |
+
error.set((err as Error).message || ERROR_MESSAGES.default);
|
68 |
console.error(err);
|
69 |
} finally {
|
70 |
loading = false;
|
src/routes/conversation/+server.ts
CHANGED
@@ -8,6 +8,8 @@ import type { Message } from "$lib/types/Message";
|
|
8 |
import { models, validateModel } from "$lib/server/models";
|
9 |
import { defaultEmbeddingModel } from "$lib/server/embeddingModels";
|
10 |
import { v4 } from "uuid";
|
|
|
|
|
11 |
|
12 |
export const POST: RequestHandler = async ({ locals, request }) => {
|
13 |
const body = await request.text();
|
@@ -23,6 +25,15 @@ export const POST: RequestHandler = async ({ locals, request }) => {
|
|
23 |
})
|
24 |
.parse(JSON.parse(body));
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
let messages: Message[] = [
|
27 |
{
|
28 |
id: v4(),
|
|
|
8 |
import { models, validateModel } from "$lib/server/models";
|
9 |
import { defaultEmbeddingModel } from "$lib/server/embeddingModels";
|
10 |
import { v4 } from "uuid";
|
11 |
+
import { authCondition } from "$lib/server/auth";
|
12 |
+
import { usageLimits } from "$lib/server/usageLimits";
|
13 |
|
14 |
export const POST: RequestHandler = async ({ locals, request }) => {
|
15 |
const body = await request.text();
|
|
|
25 |
})
|
26 |
.parse(JSON.parse(body));
|
27 |
|
28 |
+
const convCount = await collections.conversations.countDocuments(authCondition(locals));
|
29 |
+
|
30 |
+
if (usageLimits?.conversations && convCount > usageLimits?.conversations) {
|
31 |
+
throw error(
|
32 |
+
429,
|
33 |
+
"You have reached the maximum number of conversations. Delete some to continue."
|
34 |
+
);
|
35 |
+
}
|
36 |
+
|
37 |
let messages: Message[] = [
|
38 |
{
|
39 |
id: v4(),
|
src/routes/conversation/[id]/+page.svelte
CHANGED
@@ -43,7 +43,7 @@
|
|
43 |
});
|
44 |
|
45 |
if (!res.ok) {
|
46 |
-
error.set(
|
47 |
console.error("Error while creating conversation: " + (await res.text()));
|
48 |
return;
|
49 |
}
|
|
|
43 |
});
|
44 |
|
45 |
if (!res.ok) {
|
46 |
+
error.set(await res.text());
|
47 |
console.error("Error while creating conversation: " + (await res.text()));
|
48 |
return;
|
49 |
}
|
src/routes/conversation/[id]/+server.ts
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
import { MESSAGES_BEFORE_LOGIN
|
2 |
import { authCondition, requiresUser } from "$lib/server/auth";
|
3 |
import { collections } from "$lib/server/database";
|
4 |
import { models } from "$lib/server/models";
|
@@ -19,6 +19,7 @@ import { buildSubtree } from "$lib/utils/tree/buildSubtree.js";
|
|
19 |
import { addChildren } from "$lib/utils/tree/addChildren.js";
|
20 |
import { addSibling } from "$lib/utils/tree/addSibling.js";
|
21 |
import { preprocessMessages } from "$lib/server/preprocessMessages.js";
|
|
|
22 |
|
23 |
export async function POST({ request, locals, params, getClientAddress }) {
|
24 |
const id = z.string().parse(params.id);
|
@@ -95,14 +96,22 @@ export async function POST({ request, locals, params, getClientAddress }) {
|
|
95 |
}
|
96 |
}
|
97 |
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
|
|
|
|
|
|
|
|
|
|
103 |
|
104 |
-
if (
|
105 |
-
throw error(
|
|
|
|
|
|
|
106 |
}
|
107 |
|
108 |
// fetch the model
|
@@ -125,7 +134,13 @@ export async function POST({ request, locals, params, getClientAddress }) {
|
|
125 |
} = z
|
126 |
.object({
|
127 |
id: z.string().uuid().refine(isMessageId).optional(), // parent message id to append to for a normal message, or the message id for a retry/continue
|
128 |
-
inputs: z.optional(
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
is_retry: z.optional(z.boolean()),
|
130 |
is_continue: z.optional(z.boolean()),
|
131 |
web_search: z.optional(z.boolean()),
|
@@ -133,6 +148,9 @@ export async function POST({ request, locals, params, getClientAddress }) {
|
|
133 |
})
|
134 |
.parse(json);
|
135 |
|
|
|
|
|
|
|
136 |
// files is an array of base64 strings encoding Blob objects
|
137 |
// we need to convert this array to an array of File objects
|
138 |
|
|
|
1 |
+
import { MESSAGES_BEFORE_LOGIN } from "$env/static/private";
|
2 |
import { authCondition, requiresUser } from "$lib/server/auth";
|
3 |
import { collections } from "$lib/server/database";
|
4 |
import { models } from "$lib/server/models";
|
|
|
19 |
import { addChildren } from "$lib/utils/tree/addChildren.js";
|
20 |
import { addSibling } from "$lib/utils/tree/addSibling.js";
|
21 |
import { preprocessMessages } from "$lib/server/preprocessMessages.js";
|
22 |
+
import { usageLimits } from "$lib/server/usageLimits";
|
23 |
|
24 |
export async function POST({ request, locals, params, getClientAddress }) {
|
25 |
const id = z.string().parse(params.id);
|
|
|
96 |
}
|
97 |
}
|
98 |
|
99 |
+
if (usageLimits?.messagesPerMinute) {
|
100 |
+
// check if the user is rate limited
|
101 |
+
const nEvents = Math.max(
|
102 |
+
await collections.messageEvents.countDocuments({ userId }),
|
103 |
+
await collections.messageEvents.countDocuments({ ip: getClientAddress() })
|
104 |
+
);
|
105 |
+
if (nEvents > usageLimits.messagesPerMinute) {
|
106 |
+
throw error(429, ERROR_MESSAGES.rateLimited);
|
107 |
+
}
|
108 |
+
}
|
109 |
|
110 |
+
if (usageLimits?.messages && conv.messages.length > usageLimits.messages) {
|
111 |
+
throw error(
|
112 |
+
429,
|
113 |
+
`This conversation has more than ${usageLimits.messages} messages. Start a new one to continue`
|
114 |
+
);
|
115 |
}
|
116 |
|
117 |
// fetch the model
|
|
|
134 |
} = z
|
135 |
.object({
|
136 |
id: z.string().uuid().refine(isMessageId).optional(), // parent message id to append to for a normal message, or the message id for a retry/continue
|
137 |
+
inputs: z.optional(
|
138 |
+
z
|
139 |
+
.string()
|
140 |
+
.trim()
|
141 |
+
.min(1)
|
142 |
+
.transform((s) => s.replace(/\r\n/g, "\n"))
|
143 |
+
),
|
144 |
is_retry: z.optional(z.boolean()),
|
145 |
is_continue: z.optional(z.boolean()),
|
146 |
web_search: z.optional(z.boolean()),
|
|
|
148 |
})
|
149 |
.parse(json);
|
150 |
|
151 |
+
if (usageLimits?.messageLength && (newPrompt?.length ?? 0) > usageLimits.messageLength) {
|
152 |
+
throw error(400, "Message too long.");
|
153 |
+
}
|
154 |
// files is an array of base64 strings encoding Blob objects
|
155 |
// we need to convert this array to an array of File objects
|
156 |
|
src/routes/settings/assistants/new/+page.server.ts
CHANGED
@@ -7,6 +7,7 @@ import { ObjectId } from "mongodb";
|
|
7 |
import { z } from "zod";
|
8 |
import { sha256 } from "$lib/utils/sha256";
|
9 |
import sharp from "sharp";
|
|
|
10 |
import { generateSearchTokens } from "$lib/utils/searchTokens";
|
11 |
|
12 |
const newAsssistantSchema = z.object({
|
@@ -62,6 +63,18 @@ export const actions: Actions = {
|
|
62 |
return fail(400, { error: true, errors });
|
63 |
}
|
64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
const createdById = locals.user?._id ?? locals.sessionId;
|
66 |
|
67 |
const newAssistantId = new ObjectId();
|
|
|
7 |
import { z } from "zod";
|
8 |
import { sha256 } from "$lib/utils/sha256";
|
9 |
import sharp from "sharp";
|
10 |
+
import { usageLimits } from "$lib/server/usageLimits";
|
11 |
import { generateSearchTokens } from "$lib/utils/searchTokens";
|
12 |
|
13 |
const newAsssistantSchema = z.object({
|
|
|
63 |
return fail(400, { error: true, errors });
|
64 |
}
|
65 |
|
66 |
+
const assistantsCount = await collections.assistants.countDocuments(authCondition(locals));
|
67 |
+
|
68 |
+
if (usageLimits?.assistants && assistantsCount > usageLimits.assistants) {
|
69 |
+
const errors = [
|
70 |
+
{
|
71 |
+
field: "preprompt",
|
72 |
+
message: "You have reached the maximum number of assistants. Delete some to continue.",
|
73 |
+
},
|
74 |
+
];
|
75 |
+
return fail(400, { error: true, errors });
|
76 |
+
}
|
77 |
+
|
78 |
const createdById = locals.user?._id ?? locals.sessionId;
|
79 |
|
80 |
const newAssistantId = new ObjectId();
|