✨ Add stats on conversations (#828)
Browse files* ✨ Add stats on conversations
* 🩹 Fix conversation stats compute
* ⚡️ Slightly better index
* 🔊
* 🩹 Some fixes
* 🐛 Fix aggregation query
* 🐛 Fix $merge stage of query
* 🐛 Fix query on sessionId
* ✨ Compute weekly/monthly unique users
* 🩹 Final tweaks
* ⚡️ Split aggregations for better perf
No need to recompute DAUs / WAUs and so on
* ♻️ Deprecate PARQUET_EXPORT_SECRET & fix sec vuln
* Add ADMIN_API_SECRET to CD action
---------
Co-authored-by: Nathan Sarrazin <sarrazin.nathan@gmail.com>
- .env +3 -1
- .github/workflows/deploy-release.yml +1 -0
- package-lock.json +26 -9
- scripts/updateProdEnv.ts +2 -0
- src/hooks.server.ts +19 -1
- src/lib/server/database.ts +32 -0
- src/lib/types/ConversationStats.ts +13 -0
- src/routes/admin/export/+server.ts +3 -11
- src/routes/admin/stats/compute/+server.ts +217 -0
.env
CHANGED
@@ -109,7 +109,9 @@ PUBLIC_ANNOUNCEMENT_BANNERS=`[
|
|
109 |
|
110 |
PARQUET_EXPORT_DATASET=
|
111 |
PARQUET_EXPORT_HF_TOKEN=
|
112 |
-
|
|
|
|
|
113 |
|
114 |
RATE_LIMIT= # requests per minute
|
115 |
MESSAGES_BEFORE_LOGIN=# how many messages a user can send in a conversation before having to login. set to 0 to force login right away
|
|
|
109 |
|
110 |
PARQUET_EXPORT_DATASET=
|
111 |
PARQUET_EXPORT_HF_TOKEN=
|
112 |
+
ADMIN_API_SECRET=# secret to admin API calls, like computing usage stats or exporting parquet data
|
113 |
+
|
114 |
+
PARQUET_EXPORT_SECRET=#DEPRECATED, use ADMIN_API_SECRET instead
|
115 |
|
116 |
RATE_LIMIT= # requests per minute
|
117 |
MESSAGES_BEFORE_LOGIN=# how many messages a user can send in a conversation before having to login. set to 0 to force login right away
|
.github/workflows/deploy-release.yml
CHANGED
@@ -26,6 +26,7 @@ jobs:
|
|
26 |
MONGODB_URL: ${{ secrets.MONGODB_URL }}
|
27 |
HF_DEPLOYMENT_TOKEN: ${{ secrets.HF_DEPLOYMENT_TOKEN }}
|
28 |
WEBHOOK_URL_REPORT_ASSISTANT: ${{ secrets.WEBHOOK_URL_REPORT_ASSISTANT }}
|
|
|
29 |
run: npm run updateProdEnv
|
30 |
sync-to-hub:
|
31 |
runs-on: ubuntu-latest
|
|
|
26 |
MONGODB_URL: ${{ secrets.MONGODB_URL }}
|
27 |
HF_DEPLOYMENT_TOKEN: ${{ secrets.HF_DEPLOYMENT_TOKEN }}
|
28 |
WEBHOOK_URL_REPORT_ASSISTANT: ${{ secrets.WEBHOOK_URL_REPORT_ASSISTANT }}
|
29 |
+
ADMIN_API_SECRET: ${{ secrets.ADMIN_API_SECRET }}
|
30 |
run: npm run updateProdEnv
|
31 |
sync-to-hub:
|
32 |
runs-on: ubuntu-latest
|
package-lock.json
CHANGED
@@ -4093,10 +4093,17 @@
|
|
4093 |
"resolved": "https://registry.npmjs.org/int53/-/int53-0.2.4.tgz",
|
4094 |
"integrity": "sha512-a5jlKftS7HUOhkUyYD7j2sJ/ZnvWiNlZS1ldR+g1ifQ+/UuZXIE+YTc/lK1qGj/GwAU5F8Z0e1eVq2t1J5Ob2g=="
|
4095 |
},
|
4096 |
-
"node_modules/ip": {
|
4097 |
-
"version": "
|
4098 |
-
"resolved": "https://registry.npmjs.org/ip/-/ip-
|
4099 |
-
"integrity": "sha512-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4100 |
},
|
4101 |
"node_modules/is-arrayish": {
|
4102 |
"version": "0.3.2",
|
@@ -4267,6 +4274,11 @@
|
|
4267 |
"js-yaml": "bin/js-yaml.js"
|
4268 |
}
|
4269 |
},
|
|
|
|
|
|
|
|
|
|
|
4270 |
"node_modules/jsdom": {
|
4271 |
"version": "22.0.0",
|
4272 |
"resolved": "https://registry.npmjs.org/jsdom/-/jsdom-22.0.0.tgz",
|
@@ -6294,15 +6306,15 @@
|
|
6294 |
"integrity": "sha512-YIK6I2lsH072UE0aOFxxY1dPDCS43I5ktqHpeAsuLNYWkE5pGxRGWfDM4/vSUfNzXjC1Ivzt3qx31PCLmc9yqg=="
|
6295 |
},
|
6296 |
"node_modules/socks": {
|
6297 |
-
"version": "2.7.
|
6298 |
-
"resolved": "https://registry.npmjs.org/socks/-/socks-2.7.
|
6299 |
-
"integrity": "sha512-
|
6300 |
"dependencies": {
|
6301 |
-
"ip": "^
|
6302 |
"smart-buffer": "^4.2.0"
|
6303 |
},
|
6304 |
"engines": {
|
6305 |
-
"node": ">= 10.
|
6306 |
"npm": ">= 3.0.0"
|
6307 |
}
|
6308 |
},
|
@@ -6345,6 +6357,11 @@
|
|
6345 |
"memory-pager": "^1.0.2"
|
6346 |
}
|
6347 |
},
|
|
|
|
|
|
|
|
|
|
|
6348 |
"node_modules/stackback": {
|
6349 |
"version": "0.0.2",
|
6350 |
"resolved": "https://registry.npmjs.org/stackback/-/stackback-0.0.2.tgz",
|
|
|
4093 |
"resolved": "https://registry.npmjs.org/int53/-/int53-0.2.4.tgz",
|
4094 |
"integrity": "sha512-a5jlKftS7HUOhkUyYD7j2sJ/ZnvWiNlZS1ldR+g1ifQ+/UuZXIE+YTc/lK1qGj/GwAU5F8Z0e1eVq2t1J5Ob2g=="
|
4095 |
},
|
4096 |
+
"node_modules/ip-address": {
|
4097 |
+
"version": "9.0.5",
|
4098 |
+
"resolved": "https://registry.npmjs.org/ip-address/-/ip-address-9.0.5.tgz",
|
4099 |
+
"integrity": "sha512-zHtQzGojZXTwZTHQqra+ETKd4Sn3vgi7uBmlPoXVWZqYvuKmtI0l/VZTjqGmJY9x88GGOaZ9+G9ES8hC4T4X8g==",
|
4100 |
+
"dependencies": {
|
4101 |
+
"jsbn": "1.1.0",
|
4102 |
+
"sprintf-js": "^1.1.3"
|
4103 |
+
},
|
4104 |
+
"engines": {
|
4105 |
+
"node": ">= 12"
|
4106 |
+
}
|
4107 |
},
|
4108 |
"node_modules/is-arrayish": {
|
4109 |
"version": "0.3.2",
|
|
|
4274 |
"js-yaml": "bin/js-yaml.js"
|
4275 |
}
|
4276 |
},
|
4277 |
+
"node_modules/jsbn": {
|
4278 |
+
"version": "1.1.0",
|
4279 |
+
"resolved": "https://registry.npmjs.org/jsbn/-/jsbn-1.1.0.tgz",
|
4280 |
+
"integrity": "sha512-4bYVV3aAMtDTTu4+xsDYa6sy9GyJ69/amsu9sYF2zqjiEoZA5xJi3BrfX3uY+/IekIu7MwdObdbDWpoZdBv3/A=="
|
4281 |
+
},
|
4282 |
"node_modules/jsdom": {
|
4283 |
"version": "22.0.0",
|
4284 |
"resolved": "https://registry.npmjs.org/jsdom/-/jsdom-22.0.0.tgz",
|
|
|
6306 |
"integrity": "sha512-YIK6I2lsH072UE0aOFxxY1dPDCS43I5ktqHpeAsuLNYWkE5pGxRGWfDM4/vSUfNzXjC1Ivzt3qx31PCLmc9yqg=="
|
6307 |
},
|
6308 |
"node_modules/socks": {
|
6309 |
+
"version": "2.7.3",
|
6310 |
+
"resolved": "https://registry.npmjs.org/socks/-/socks-2.7.3.tgz",
|
6311 |
+
"integrity": "sha512-vfuYK48HXCTFD03G/1/zkIls3Ebr2YNa4qU9gHDZdblHLiqhJrJGkY3+0Nx0JpN9qBhJbVObc1CNciT1bIZJxw==",
|
6312 |
"dependencies": {
|
6313 |
+
"ip-address": "^9.0.5",
|
6314 |
"smart-buffer": "^4.2.0"
|
6315 |
},
|
6316 |
"engines": {
|
6317 |
+
"node": ">= 10.0.0",
|
6318 |
"npm": ">= 3.0.0"
|
6319 |
}
|
6320 |
},
|
|
|
6357 |
"memory-pager": "^1.0.2"
|
6358 |
}
|
6359 |
},
|
6360 |
+
"node_modules/sprintf-js": {
|
6361 |
+
"version": "1.1.3",
|
6362 |
+
"resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.1.3.tgz",
|
6363 |
+
"integrity": "sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA=="
|
6364 |
+
},
|
6365 |
"node_modules/stackback": {
|
6366 |
"version": "0.0.2",
|
6367 |
"resolved": "https://registry.npmjs.org/stackback/-/stackback-0.0.2.tgz",
|
scripts/updateProdEnv.ts
CHANGED
@@ -7,6 +7,7 @@ const OPENID_CONFIG = process.env.OPENID_CONFIG;
|
|
7 |
const MONGODB_URL = process.env.MONGODB_URL;
|
8 |
const HF_TOKEN = process.env.HF_TOKEN ?? process.env.HF_ACCESS_TOKEN; // token used for API requests in prod
|
9 |
const WEBHOOK_URL_REPORT_ASSISTANT = process.env.WEBHOOK_URL_REPORT_ASSISTANT; // slack webhook url used to get "report assistant" events
|
|
|
10 |
|
11 |
// Read the content of the file .env.template
|
12 |
const PUBLIC_CONFIG = fs.readFileSync(".env.template", "utf8");
|
@@ -18,6 +19,7 @@ OPENID_CONFIG=${OPENID_CONFIG}
|
|
18 |
SERPER_API_KEY=${SERPER_API_KEY}
|
19 |
HF_TOKEN=${HF_TOKEN}
|
20 |
WEBHOOK_URL_REPORT_ASSISTANT=${WEBHOOK_URL_REPORT_ASSISTANT}
|
|
|
21 |
`;
|
22 |
|
23 |
// Make an HTTP POST request to add the space secrets
|
|
|
7 |
const MONGODB_URL = process.env.MONGODB_URL;
|
8 |
const HF_TOKEN = process.env.HF_TOKEN ?? process.env.HF_ACCESS_TOKEN; // token used for API requests in prod
|
9 |
const WEBHOOK_URL_REPORT_ASSISTANT = process.env.WEBHOOK_URL_REPORT_ASSISTANT; // slack webhook url used to get "report assistant" events
|
10 |
+
const ADMIN_API_SECRET = process.env.ADMIN_API_SECRET;
|
11 |
|
12 |
// Read the content of the file .env.template
|
13 |
const PUBLIC_CONFIG = fs.readFileSync(".env.template", "utf8");
|
|
|
19 |
SERPER_API_KEY=${SERPER_API_KEY}
|
20 |
HF_TOKEN=${HF_TOKEN}
|
21 |
WEBHOOK_URL_REPORT_ASSISTANT=${WEBHOOK_URL_REPORT_ASSISTANT}
|
22 |
+
ADMIN_API_SECRET=${ADMIN_API_SECRET}
|
23 |
`;
|
24 |
|
25 |
// Make an HTTP POST request to add the space secrets
|
src/hooks.server.ts
CHANGED
@@ -1,4 +1,10 @@
|
|
1 |
-
import {
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import type { Handle } from "@sveltejs/kit";
|
3 |
import {
|
4 |
PUBLIC_GOOGLE_ANALYTICS_ID,
|
@@ -29,6 +35,18 @@ export const handle: Handle = async ({ event, resolve }) => {
|
|
29 |
});
|
30 |
}
|
31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
const token = event.cookies.get(COOKIE_NAME);
|
33 |
|
34 |
let secretSessionId: string;
|
|
|
1 |
+
import {
|
2 |
+
ADMIN_API_SECRET,
|
3 |
+
COOKIE_NAME,
|
4 |
+
EXPOSE_API,
|
5 |
+
MESSAGES_BEFORE_LOGIN,
|
6 |
+
PARQUET_EXPORT_SECRET,
|
7 |
+
} from "$env/static/private";
|
8 |
import type { Handle } from "@sveltejs/kit";
|
9 |
import {
|
10 |
PUBLIC_GOOGLE_ANALYTICS_ID,
|
|
|
35 |
});
|
36 |
}
|
37 |
|
38 |
+
if (event.url.pathname.startsWith(`${base}/admin/`) || event.url.pathname === `${base}/admin`) {
|
39 |
+
const ADMIN_SECRET = ADMIN_API_SECRET || PARQUET_EXPORT_SECRET;
|
40 |
+
|
41 |
+
if (!ADMIN_SECRET) {
|
42 |
+
return errorResponse(500, "Admin API is not configured");
|
43 |
+
}
|
44 |
+
|
45 |
+
if (event.request.headers.get("Authorization") !== `Bearer ${ADMIN_SECRET}`) {
|
46 |
+
return errorResponse(401, "Unauthorized");
|
47 |
+
}
|
48 |
+
}
|
49 |
+
|
50 |
const token = event.cookies.get(COOKIE_NAME);
|
51 |
|
52 |
let secretSessionId: string;
|
src/lib/server/database.ts
CHANGED
@@ -9,6 +9,7 @@ import type { MessageEvent } from "$lib/types/MessageEvent";
|
|
9 |
import type { Session } from "$lib/types/Session";
|
10 |
import type { Assistant } from "$lib/types/Assistant";
|
11 |
import type { Report } from "$lib/types/Report";
|
|
|
12 |
|
13 |
if (!MONGODB_URL) {
|
14 |
throw new Error(
|
@@ -24,7 +25,10 @@ export const connectPromise = client.connect().catch(console.error);
|
|
24 |
|
25 |
const db = client.db(MONGODB_DB_NAME + (import.meta.env.MODE === "test" ? "-test" : ""));
|
26 |
|
|
|
|
|
27 |
const conversations = db.collection<Conversation>("conversations");
|
|
|
28 |
const assistants = db.collection<Assistant>("assistants");
|
29 |
const reports = db.collection<Report>("reports");
|
30 |
const sharedConversations = db.collection<SharedConversation>("sharedConversations");
|
@@ -38,6 +42,7 @@ const bucket = new GridFSBucket(db, { bucketName: "files" });
|
|
38 |
export { client, db };
|
39 |
export const collections = {
|
40 |
conversations,
|
|
|
41 |
assistants,
|
42 |
reports,
|
43 |
sharedConversations,
|
@@ -68,6 +73,33 @@ client.on("open", () => {
|
|
68 |
{ partialFilterExpression: { userId: { $exists: true } } }
|
69 |
)
|
70 |
.catch(console.error);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
abortedGenerations.createIndex({ updatedAt: 1 }, { expireAfterSeconds: 30 }).catch(console.error);
|
72 |
abortedGenerations.createIndex({ conversationId: 1 }, { unique: true }).catch(console.error);
|
73 |
sharedConversations.createIndex({ hash: 1 }, { unique: true }).catch(console.error);
|
|
|
9 |
import type { Session } from "$lib/types/Session";
|
10 |
import type { Assistant } from "$lib/types/Assistant";
|
11 |
import type { Report } from "$lib/types/Report";
|
12 |
+
import type { ConversationStats } from "$lib/types/ConversationStats";
|
13 |
|
14 |
if (!MONGODB_URL) {
|
15 |
throw new Error(
|
|
|
25 |
|
26 |
const db = client.db(MONGODB_DB_NAME + (import.meta.env.MODE === "test" ? "-test" : ""));
|
27 |
|
28 |
+
export const CONVERSATION_STATS_COLLECTION = "conversations.stats";
|
29 |
+
|
30 |
const conversations = db.collection<Conversation>("conversations");
|
31 |
+
const conversationStats = db.collection<ConversationStats>(CONVERSATION_STATS_COLLECTION);
|
32 |
const assistants = db.collection<Assistant>("assistants");
|
33 |
const reports = db.collection<Report>("reports");
|
34 |
const sharedConversations = db.collection<SharedConversation>("sharedConversations");
|
|
|
42 |
export { client, db };
|
43 |
export const collections = {
|
44 |
conversations,
|
45 |
+
conversationStats,
|
46 |
assistants,
|
47 |
reports,
|
48 |
sharedConversations,
|
|
|
73 |
{ partialFilterExpression: { userId: { $exists: true } } }
|
74 |
)
|
75 |
.catch(console.error);
|
76 |
+
// To do stats on conversations
|
77 |
+
conversations.createIndex({ updatedAt: 1 }).catch(console.error);
|
78 |
+
// Not strictly necessary, could use _id, but more convenient. Also for stats
|
79 |
+
conversations.createIndex({ createdAt: 1 }).catch(console.error);
|
80 |
+
// To do stats on conversation messages
|
81 |
+
conversations.createIndex({ "messages.createdAt": 1 }, { sparse: true }).catch(console.error);
|
82 |
+
// Unique index for stats
|
83 |
+
conversationStats
|
84 |
+
.createIndex(
|
85 |
+
{
|
86 |
+
type: 1,
|
87 |
+
"date.field": 1,
|
88 |
+
"date.span": 1,
|
89 |
+
"date.at": 1,
|
90 |
+
distinct: 1,
|
91 |
+
},
|
92 |
+
{ unique: true }
|
93 |
+
)
|
94 |
+
.catch(console.error);
|
95 |
+
// Allow easy check of last computed stat for given type/dateField
|
96 |
+
conversationStats
|
97 |
+
.createIndex({
|
98 |
+
type: 1,
|
99 |
+
"date.field": 1,
|
100 |
+
"date.at": 1,
|
101 |
+
})
|
102 |
+
.catch(console.error);
|
103 |
abortedGenerations.createIndex({ updatedAt: 1 }, { expireAfterSeconds: 30 }).catch(console.error);
|
104 |
abortedGenerations.createIndex({ conversationId: 1 }, { unique: true }).catch(console.error);
|
105 |
sharedConversations.createIndex({ hash: 1 }, { unique: true }).catch(console.error);
|
src/lib/types/ConversationStats.ts
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import type { Timestamps } from "./Timestamps";
|
2 |
+
|
3 |
+
export interface ConversationStats extends Timestamps {
|
4 |
+
date: {
|
5 |
+
at: Date;
|
6 |
+
span: "day" | "week" | "month";
|
7 |
+
field: "updatedAt" | "createdAt";
|
8 |
+
};
|
9 |
+
type: "conversation" | "message";
|
10 |
+
/** _id => number of conversations/messages in the month */
|
11 |
+
distinct: "sessionId" | "userId" | "userOrSessionId" | "_id";
|
12 |
+
count: number;
|
13 |
+
}
|
src/routes/admin/export/+server.ts
CHANGED
@@ -1,8 +1,4 @@
|
|
1 |
-
import {
|
2 |
-
PARQUET_EXPORT_DATASET,
|
3 |
-
PARQUET_EXPORT_HF_TOKEN,
|
4 |
-
PARQUET_EXPORT_SECRET,
|
5 |
-
} from "$env/static/private";
|
6 |
import { collections } from "$lib/server/database";
|
7 |
import type { Message } from "$lib/types/Message";
|
8 |
import { error } from "@sveltejs/kit";
|
@@ -13,17 +9,13 @@ import parquet from "parquetjs";
|
|
13 |
import { z } from "zod";
|
14 |
|
15 |
// Triger like this:
|
16 |
-
// curl -X POST "http://localhost:5173/chat/admin/export" -H "Authorization: Bearer <
|
17 |
|
18 |
export async function POST({ request }) {
|
19 |
-
if (!
|
20 |
throw error(500, "Parquet export is not configured.");
|
21 |
}
|
22 |
|
23 |
-
if (request.headers.get("Authorization") !== `Bearer ${PARQUET_EXPORT_SECRET}`) {
|
24 |
-
throw error(403);
|
25 |
-
}
|
26 |
-
|
27 |
const { model } = z
|
28 |
.object({
|
29 |
model: z.string(),
|
|
|
1 |
+
import { PARQUET_EXPORT_DATASET, PARQUET_EXPORT_HF_TOKEN } from "$env/static/private";
|
|
|
|
|
|
|
|
|
2 |
import { collections } from "$lib/server/database";
|
3 |
import type { Message } from "$lib/types/Message";
|
4 |
import { error } from "@sveltejs/kit";
|
|
|
9 |
import { z } from "zod";
|
10 |
|
11 |
// Triger like this:
|
12 |
+
// curl -X POST "http://localhost:5173/chat/admin/export" -H "Authorization: Bearer <ADMIN_API_SECRET>" -H "Content-Type: application/json" -d '{"model": "OpenAssistant/oasst-sft-6-llama-30b-xor"}'
|
13 |
|
14 |
export async function POST({ request }) {
|
15 |
+
if (!PARQUET_EXPORT_DATASET || !PARQUET_EXPORT_HF_TOKEN) {
|
16 |
throw error(500, "Parquet export is not configured.");
|
17 |
}
|
18 |
|
|
|
|
|
|
|
|
|
19 |
const { model } = z
|
20 |
.object({
|
21 |
model: z.string(),
|
src/routes/admin/stats/compute/+server.ts
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { json } from "@sveltejs/kit";
|
2 |
+
import type { ConversationStats } from "$lib/types/ConversationStats";
|
3 |
+
import { CONVERSATION_STATS_COLLECTION, collections } from "$lib/server/database.js";
|
4 |
+
|
5 |
+
// Triger like this:
|
6 |
+
// curl -X POST "http://localhost:5173/chat/admin/stats/compute" -H "Authorization: Bearer <ADMIN_API_SECRET>"
|
7 |
+
|
8 |
+
export async function POST() {
|
9 |
+
for (const span of ["day", "week", "month"] as const) {
|
10 |
+
computeStats({ dateField: "updatedAt", type: "conversation", span }).catch(console.error);
|
11 |
+
computeStats({ dateField: "createdAt", type: "conversation", span }).catch(console.error);
|
12 |
+
computeStats({ dateField: "createdAt", type: "message", span }).catch(console.error);
|
13 |
+
}
|
14 |
+
|
15 |
+
return json({}, { status: 202 });
|
16 |
+
}
|
17 |
+
|
18 |
+
async function computeStats(params: {
|
19 |
+
dateField: ConversationStats["date"]["field"];
|
20 |
+
span: ConversationStats["date"]["span"];
|
21 |
+
type: ConversationStats["type"];
|
22 |
+
}) {
|
23 |
+
const lastComputed = await collections.conversationStats.findOne(
|
24 |
+
{ "date.field": params.dateField, "date.span": params.span, type: params.type },
|
25 |
+
{ sort: { "date.at": -1 } }
|
26 |
+
);
|
27 |
+
|
28 |
+
// If the last computed week is at the beginning of the last computed month, we need to include some days from the previous month
|
29 |
+
// In those cases we need to compute the stats from before the last month as everything is one aggregation
|
30 |
+
const minDate = lastComputed ? lastComputed.date.at : new Date(0);
|
31 |
+
|
32 |
+
console.log("Computing stats for", params.type, params.span, params.dateField, "from", minDate);
|
33 |
+
|
34 |
+
const dateField = params.type === "message" ? "messages." + params.dateField : params.dateField;
|
35 |
+
|
36 |
+
const pipeline = [
|
37 |
+
{
|
38 |
+
$match: {
|
39 |
+
[dateField]: { $gte: minDate },
|
40 |
+
},
|
41 |
+
},
|
42 |
+
{
|
43 |
+
$project: {
|
44 |
+
[dateField]: 1,
|
45 |
+
sessionId: 1,
|
46 |
+
userId: 1,
|
47 |
+
},
|
48 |
+
},
|
49 |
+
...(params.type === "message"
|
50 |
+
? [
|
51 |
+
{
|
52 |
+
$unwind: "$messages",
|
53 |
+
},
|
54 |
+
{
|
55 |
+
$match: {
|
56 |
+
[dateField]: { $gte: minDate },
|
57 |
+
},
|
58 |
+
},
|
59 |
+
]
|
60 |
+
: []),
|
61 |
+
{
|
62 |
+
$sort: {
|
63 |
+
[dateField]: 1,
|
64 |
+
},
|
65 |
+
},
|
66 |
+
{
|
67 |
+
$facet: {
|
68 |
+
userId: [
|
69 |
+
{
|
70 |
+
$match: {
|
71 |
+
userId: { $exists: true },
|
72 |
+
},
|
73 |
+
},
|
74 |
+
{
|
75 |
+
$group: {
|
76 |
+
_id: {
|
77 |
+
at: { $dateTrunc: { date: `$${dateField}`, unit: params.span } },
|
78 |
+
userId: "$userId",
|
79 |
+
},
|
80 |
+
},
|
81 |
+
},
|
82 |
+
{
|
83 |
+
$group: {
|
84 |
+
_id: "$_id.at",
|
85 |
+
count: { $sum: 1 },
|
86 |
+
},
|
87 |
+
},
|
88 |
+
{
|
89 |
+
$project: {
|
90 |
+
_id: 0,
|
91 |
+
date: {
|
92 |
+
at: "$_id",
|
93 |
+
field: params.dateField,
|
94 |
+
span: params.span,
|
95 |
+
},
|
96 |
+
distinct: "userId",
|
97 |
+
count: 1,
|
98 |
+
},
|
99 |
+
},
|
100 |
+
],
|
101 |
+
sessionId: [
|
102 |
+
{
|
103 |
+
$match: {
|
104 |
+
sessionId: { $exists: true },
|
105 |
+
},
|
106 |
+
},
|
107 |
+
{
|
108 |
+
$group: {
|
109 |
+
_id: {
|
110 |
+
at: { $dateTrunc: { date: `$${dateField}`, unit: params.span } },
|
111 |
+
sessionId: "$sessionId",
|
112 |
+
},
|
113 |
+
},
|
114 |
+
},
|
115 |
+
{
|
116 |
+
$group: {
|
117 |
+
_id: "$_id.at",
|
118 |
+
count: { $sum: 1 },
|
119 |
+
},
|
120 |
+
},
|
121 |
+
{
|
122 |
+
$project: {
|
123 |
+
_id: 0,
|
124 |
+
date: {
|
125 |
+
at: "$_id",
|
126 |
+
field: params.dateField,
|
127 |
+
span: params.span,
|
128 |
+
},
|
129 |
+
distinct: "sessionId",
|
130 |
+
count: 1,
|
131 |
+
},
|
132 |
+
},
|
133 |
+
],
|
134 |
+
userOrSessionId: [
|
135 |
+
{
|
136 |
+
$group: {
|
137 |
+
_id: {
|
138 |
+
at: { $dateTrunc: { date: `$${dateField}`, unit: params.span } },
|
139 |
+
userOrSessionId: { $ifNull: ["$userId", "$sessionId"] },
|
140 |
+
},
|
141 |
+
},
|
142 |
+
},
|
143 |
+
{
|
144 |
+
$group: {
|
145 |
+
_id: "$_id.at",
|
146 |
+
count: { $sum: 1 },
|
147 |
+
},
|
148 |
+
},
|
149 |
+
{
|
150 |
+
$project: {
|
151 |
+
_id: 0,
|
152 |
+
date: {
|
153 |
+
at: "$_id",
|
154 |
+
field: params.dateField,
|
155 |
+
span: params.span,
|
156 |
+
},
|
157 |
+
distinct: "userOrSessionId",
|
158 |
+
count: 1,
|
159 |
+
},
|
160 |
+
},
|
161 |
+
],
|
162 |
+
_id: [
|
163 |
+
{
|
164 |
+
$group: {
|
165 |
+
_id: { $dateTrunc: { date: `$${dateField}`, unit: params.span } },
|
166 |
+
count: { $sum: 1 },
|
167 |
+
},
|
168 |
+
},
|
169 |
+
{
|
170 |
+
$project: {
|
171 |
+
_id: 0,
|
172 |
+
date: {
|
173 |
+
at: "$_id",
|
174 |
+
field: params.dateField,
|
175 |
+
span: params.span,
|
176 |
+
},
|
177 |
+
distinct: "_id",
|
178 |
+
count: 1,
|
179 |
+
},
|
180 |
+
},
|
181 |
+
],
|
182 |
+
},
|
183 |
+
},
|
184 |
+
{
|
185 |
+
$project: {
|
186 |
+
stats: {
|
187 |
+
$concatArrays: ["$userId", "$sessionId", "$userOrSessionId", "$_id"],
|
188 |
+
},
|
189 |
+
},
|
190 |
+
},
|
191 |
+
{
|
192 |
+
$unwind: "$stats",
|
193 |
+
},
|
194 |
+
{
|
195 |
+
$replaceRoot: {
|
196 |
+
newRoot: "$stats",
|
197 |
+
},
|
198 |
+
},
|
199 |
+
{
|
200 |
+
$set: {
|
201 |
+
type: params.type,
|
202 |
+
},
|
203 |
+
},
|
204 |
+
{
|
205 |
+
$merge: {
|
206 |
+
into: CONVERSATION_STATS_COLLECTION,
|
207 |
+
on: ["date.at", "type", "date.span", "date.field", "distinct"],
|
208 |
+
whenMatched: "replace",
|
209 |
+
whenNotMatched: "insert",
|
210 |
+
},
|
211 |
+
},
|
212 |
+
];
|
213 |
+
|
214 |
+
await collections.conversations.aggregate(pipeline, { allowDiskUse: true }).next();
|
215 |
+
|
216 |
+
console.log("Computed stats for", params.type, params.span, params.dateField);
|
217 |
+
}
|