nsarrazin HF staff mishig HF staff Mishig commited on
Commit
4dbcbb6
·
unverified ·
1 Parent(s): 10dbbd6

Automated migration setup (#897)

Browse files

* Initial migration setup for chat-ui

* refresh the lock regularly while the migrations are running

* add index for migrationResults

* clean up code a bit

* Don't try to run migrations when sveltekit is building

* simplified lock code

* Add early check for migrations being done
Reduce timer for lock

* migration use `generateSearchTokens`

* Update src/lib/migrations/migrations.spec.ts

Co-authored-by: Mishig <mishig.davaadorj@coloradocollege.edu>

---------

Co-authored-by: Mishig Davaadorj <dmishig@gmail.com>
Co-authored-by: Mishig <mishig.davaadorj@coloradocollege.edu>

src/hooks.server.ts CHANGED
@@ -17,6 +17,12 @@ import { findUser, refreshSessionCookie, requiresUser } from "$lib/server/auth";
17
  import { ERROR_MESSAGES } from "$lib/stores/errors";
18
  import { sha256 } from "$lib/utils/sha256";
19
  import { addWeeks } from "date-fns";
 
 
 
 
 
 
20
 
21
  export const handle: Handle = async ({ event, resolve }) => {
22
  if (event.url.pathname.startsWith(`${base}/api/`) && EXPOSE_API !== "true") {
 
17
  import { ERROR_MESSAGES } from "$lib/stores/errors";
18
  import { sha256 } from "$lib/utils/sha256";
19
  import { addWeeks } from "date-fns";
20
+ import { checkAndRunMigrations } from "$lib/migrations/migrations";
21
+ import { building } from "$app/environment";
22
+
23
+ if (!building) {
24
+ await checkAndRunMigrations();
25
+ }
26
 
27
  export const handle: Handle = async ({ event, resolve }) => {
28
  if (event.url.pathname.startsWith(`${base}/api/`) && EXPOSE_API !== "true") {
src/lib/migrations/lock.ts ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { collections } from "$lib/server/database";
2
+
3
+ export async function acquireLock(key = "migrations") {
4
+ try {
5
+ const insert = await collections.semaphores.insertOne({
6
+ key,
7
+ createdAt: new Date(),
8
+ updatedAt: new Date(),
9
+ });
10
+
11
+ return !!insert.acknowledged; // true if the document was inserted
12
+ } catch (e) {
13
+ // unique index violation, so there must already be a lock
14
+ return false;
15
+ }
16
+ }
17
+
18
+ export async function releaseLock(key = "migrations") {
19
+ await collections.semaphores.deleteOne({
20
+ key,
21
+ });
22
+ }
23
+
24
+ export async function isDBLocked(key = "migrations"): Promise<boolean> {
25
+ const res = await collections.semaphores.countDocuments({
26
+ key,
27
+ });
28
+ return res > 0;
29
+ }
30
+
31
+ export async function refreshLock(key = "migrations") {
32
+ await collections.semaphores.updateOne(
33
+ {
34
+ key,
35
+ },
36
+ {
37
+ $set: {
38
+ updatedAt: new Date(),
39
+ },
40
+ }
41
+ );
42
+ }
src/lib/migrations/migrations.spec.ts ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { afterEach, describe, expect, it } from "vitest";
2
+ import { migrations } from "./routines";
3
+ import { acquireLock, isDBLocked, refreshLock, releaseLock } from "./lock";
4
+ import { collections } from "$lib/server/database";
5
+
6
+ describe("migrations", () => {
7
+ it("should not have duplicates guid", async () => {
8
+ const guids = migrations.map((m) => m._id.toString());
9
+ const uniqueGuids = [...new Set(guids)];
10
+ expect(uniqueGuids.length).toBe(guids.length);
11
+ });
12
+
13
+ it("should acquire only one lock on DB", async () => {
14
+ const results = await Promise.all(new Array(1000).fill(0).map(() => acquireLock()));
15
+ const locks = results.filter((r) => r);
16
+
17
+ const semaphores = await collections.semaphores.find({}).toArray();
18
+
19
+ expect(locks.length).toBe(1);
20
+ expect(semaphores).toBeDefined();
21
+ expect(semaphores.length).toBe(1);
22
+ expect(semaphores?.[0].key).toBe("migrations");
23
+ });
24
+
25
+ it("should read the lock correctly", async () => {
26
+ expect(await acquireLock()).toBe(true);
27
+ expect(await isDBLocked()).toBe(true);
28
+ expect(await acquireLock()).toBe(false);
29
+ await releaseLock();
30
+ expect(await isDBLocked()).toBe(false);
31
+ });
32
+
33
+ it("should refresh the lock", async () => {
34
+ await acquireLock();
35
+
36
+ // get the updatedAt time
37
+
38
+ const updatedAtInitially = (await collections.semaphores.findOne({}))?.updatedAt;
39
+
40
+ await refreshLock();
41
+
42
+ const updatedAtAfterRefresh = (await collections.semaphores.findOne({}))?.updatedAt;
43
+
44
+ expect(updatedAtInitially).toBeDefined();
45
+ expect(updatedAtAfterRefresh).toBeDefined();
46
+ expect(updatedAtInitially).not.toBe(updatedAtAfterRefresh);
47
+ });
48
+ });
49
+
50
+ afterEach(async () => {
51
+ await collections.semaphores.deleteMany({});
52
+ await collections.migrationResults.deleteMany({});
53
+ });
src/lib/migrations/migrations.ts ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { client, collections } from "$lib/server/database";
2
+ import { migrations } from "./routines";
3
+ import { acquireLock, releaseLock, isDBLocked, refreshLock } from "./lock";
4
+ import { isHuggingChat } from "$lib/utils/isHuggingChat";
5
+
6
+ export async function checkAndRunMigrations() {
7
+ // make sure all GUIDs are unique
8
+ if (new Set(migrations.map((m) => m._id.toString())).size !== migrations.length) {
9
+ throw new Error("Duplicate migration GUIDs found.");
10
+ }
11
+
12
+ // check if all migrations have already been run
13
+ const migrationResults = await collections.migrationResults.find().toArray();
14
+
15
+ // if all the migrations._id are in the migrationResults, we can exit early
16
+ if (
17
+ migrations.every((m) => migrationResults.some((m2) => m2._id.toString() === m._id.toString()))
18
+ ) {
19
+ console.log("[MIGRATIONS] All migrations already applied.");
20
+ return;
21
+ }
22
+
23
+ console.log("[MIGRATIONS] Begin check...");
24
+
25
+ // connect to the database
26
+ const connectedClient = await client.connect();
27
+
28
+ const hasLock = await acquireLock();
29
+
30
+ if (!hasLock) {
31
+ // another instance already has the lock, so we exit early
32
+ console.log(
33
+ "[MIGRATIONS] Another instance already has the lock. Waiting for DB to be unlocked."
34
+ );
35
+
36
+ // block until the lock is released
37
+ while (await isDBLocked()) {
38
+ await new Promise((resolve) => setTimeout(resolve, 1000));
39
+ }
40
+ return;
41
+ }
42
+
43
+ // once here, we have the lock
44
+ // make sure to refresh it regularly while it's running
45
+ const refreshInterval = setInterval(async () => {
46
+ await refreshLock();
47
+ }, 1000 * 10);
48
+
49
+ // iterate over all migrations
50
+ for (const migration of migrations) {
51
+ // check if the migration has already been applied
52
+ const existingMigrationResult = migrationResults.find(
53
+ (m) => m._id.toString() === migration._id.toString()
54
+ );
55
+
56
+ // check if the migration has already been applied
57
+ if (existingMigrationResult) {
58
+ console.log(`[MIGRATIONS] "${migration.name}" already applied. Skipping...`);
59
+ } else {
60
+ // check the modifiers to see if some cases match
61
+ if (
62
+ (migration.runForHuggingChat === "only" && !isHuggingChat) ||
63
+ (migration.runForHuggingChat === "never" && isHuggingChat)
64
+ ) {
65
+ console.log(
66
+ `[MIGRATIONS] "${migration.name}" should not be applied for this run. Skipping...`
67
+ );
68
+ continue;
69
+ }
70
+
71
+ // otherwise all is good and we cna run the migration
72
+ console.log(`[MIGRATIONS] "${migration.name}" not applied yet. Applying...`);
73
+
74
+ await collections.migrationResults.updateOne(
75
+ { _id: migration._id },
76
+ {
77
+ $set: {
78
+ name: migration.name,
79
+ status: "ongoing",
80
+ },
81
+ },
82
+ { upsert: true }
83
+ );
84
+
85
+ const session = connectedClient.startSession();
86
+ let result = false;
87
+
88
+ try {
89
+ await session.withTransaction(async () => {
90
+ result = await migration.up(connectedClient);
91
+ });
92
+ } catch (e) {
93
+ console.log(`[MIGRATION[] "${migration.name}" failed!`);
94
+ console.error(e);
95
+ } finally {
96
+ await session.endSession();
97
+ }
98
+
99
+ await collections.migrationResults.updateOne(
100
+ { _id: migration._id },
101
+ {
102
+ $set: {
103
+ name: migration.name,
104
+ status: result ? "success" : "failure",
105
+ },
106
+ },
107
+ { upsert: true }
108
+ );
109
+ }
110
+ }
111
+
112
+ console.log("[MIGRATIONS] All migrations applied. Releasing lock");
113
+
114
+ clearInterval(refreshInterval);
115
+ await releaseLock();
116
+ }
src/lib/migrations/routines/01-update-search-assistants.ts ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { Migration } from ".";
2
+ import { getCollections } from "$lib/server/database";
3
+ import { ObjectId, type AnyBulkWriteOperation } from "mongodb";
4
+ import type { Assistant } from "$lib/types/Assistant";
5
+ import { generateSearchTokens } from "$lib/utils/searchTokens";
6
+
7
+ const migration: Migration = {
8
+ _id: new ObjectId("5f9f3e3e3e3e3e3e3e3e3e3e"),
9
+ name: "Update search assistants",
10
+ up: async (client) => {
11
+ const { assistants } = getCollections(client);
12
+ let ops: AnyBulkWriteOperation<Assistant>[] = [];
13
+
14
+ for await (const assistant of assistants
15
+ .find()
16
+ .project<Pick<Assistant, "_id" | "name">>({ _id: 1, name: 1 })) {
17
+ ops.push({
18
+ updateOne: {
19
+ filter: {
20
+ _id: assistant._id,
21
+ },
22
+ update: {
23
+ $set: {
24
+ searchTokens: generateSearchTokens(assistant.name),
25
+ },
26
+ },
27
+ },
28
+ });
29
+
30
+ if (ops.length >= 1000) {
31
+ process.stdout.write(".");
32
+ await assistants.bulkWrite(ops, { ordered: false });
33
+ ops = [];
34
+ }
35
+ }
36
+
37
+ if (ops.length) {
38
+ await assistants.bulkWrite(ops, { ordered: false });
39
+ }
40
+
41
+ return true;
42
+ },
43
+ down: async (client) => {
44
+ const { assistants } = getCollections(client);
45
+ await assistants.updateMany({}, { $unset: { searchTokens: "" } });
46
+ return true;
47
+ },
48
+ };
49
+
50
+ export default migration;
src/lib/migrations/routines/index.ts ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { MongoClient, ObjectId } from "mongodb";
2
+
3
+ import updateSearchAssistant from "./01-update-search-assistants";
4
+
5
+ export interface Migration {
6
+ _id: ObjectId;
7
+ name: string;
8
+ up: (client: MongoClient) => Promise<boolean>;
9
+ down?: (client: MongoClient) => Promise<boolean>;
10
+ runForFreshInstall?: "only" | "never"; // leave unspecified to run for both
11
+ runForHuggingChat?: "only" | "never"; // leave unspecified to run for both
12
+ }
13
+
14
+ export const migrations: Migration[] = [updateSearchAssistant];
src/lib/server/database.ts CHANGED
@@ -10,12 +10,15 @@ import type { Session } from "$lib/types/Session";
10
  import type { Assistant } from "$lib/types/Assistant";
11
  import type { Report } from "$lib/types/Report";
12
  import type { ConversationStats } from "$lib/types/ConversationStats";
 
 
13
 
14
  if (!MONGODB_URL) {
15
  throw new Error(
16
  "Please specify the MONGODB_URL environment variable inside .env.local. Set it to mongodb://localhost:27017 if you are running MongoDB locally, or to a MongoDB Atlas free instance for example."
17
  );
18
  }
 
19
 
20
  const client = new MongoClient(MONGODB_URL, {
21
  directConnection: MONGODB_DIRECT_CONNECTION === "true",
@@ -23,24 +26,44 @@ const client = new MongoClient(MONGODB_URL, {
23
 
24
  export const connectPromise = client.connect().catch(console.error);
25
 
26
- const db = client.db(MONGODB_DB_NAME + (import.meta.env.MODE === "test" ? "-test" : ""));
 
27
 
28
- export const CONVERSATION_STATS_COLLECTION = "conversations.stats";
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- const conversations = db.collection<Conversation>("conversations");
31
- const conversationStats = db.collection<ConversationStats>(CONVERSATION_STATS_COLLECTION);
32
- const assistants = db.collection<Assistant>("assistants");
33
- const reports = db.collection<Report>("reports");
34
- const sharedConversations = db.collection<SharedConversation>("sharedConversations");
35
- const abortedGenerations = db.collection<AbortedGeneration>("abortedGenerations");
36
- const settings = db.collection<Settings>("settings");
37
- const users = db.collection<User>("users");
38
- const sessions = db.collection<Session>("sessions");
39
- const messageEvents = db.collection<MessageEvent>("messageEvents");
40
- const bucket = new GridFSBucket(db, { bucketName: "files" });
41
 
42
- export { client, db };
43
- export const collections = {
44
  conversations,
45
  conversationStats,
46
  assistants,
@@ -51,8 +74,10 @@ export const collections = {
51
  users,
52
  sessions,
53
  messageEvents,
54
- bucket,
55
- };
 
 
56
 
57
  client.on("open", () => {
58
  conversations
@@ -120,4 +145,8 @@ client.on("open", () => {
120
  assistants.createIndex({ searchTokens: 1 }).catch(console.error);
121
  reports.createIndex({ assistantId: 1 }).catch(console.error);
122
  reports.createIndex({ createdBy: 1, assistantId: 1 }).catch(console.error);
 
 
 
 
123
  });
 
10
  import type { Assistant } from "$lib/types/Assistant";
11
  import type { Report } from "$lib/types/Report";
12
  import type { ConversationStats } from "$lib/types/ConversationStats";
13
+ import type { MigrationResult } from "$lib/types/MigrationResult";
14
+ import type { Semaphore } from "$lib/types/Semaphore";
15
 
16
  if (!MONGODB_URL) {
17
  throw new Error(
18
  "Please specify the MONGODB_URL environment variable inside .env.local. Set it to mongodb://localhost:27017 if you are running MongoDB locally, or to a MongoDB Atlas free instance for example."
19
  );
20
  }
21
+ export const CONVERSATION_STATS_COLLECTION = "conversations.stats";
22
 
23
  const client = new MongoClient(MONGODB_URL, {
24
  directConnection: MONGODB_DIRECT_CONNECTION === "true",
 
26
 
27
  export const connectPromise = client.connect().catch(console.error);
28
 
29
+ export function getCollections(mongoClient: MongoClient) {
30
+ const db = mongoClient.db(MONGODB_DB_NAME + (import.meta.env.MODE === "test" ? "-test" : ""));
31
 
32
+ const conversations = db.collection<Conversation>("conversations");
33
+ const conversationStats = db.collection<ConversationStats>(CONVERSATION_STATS_COLLECTION);
34
+ const assistants = db.collection<Assistant>("assistants");
35
+ const reports = db.collection<Report>("reports");
36
+ const sharedConversations = db.collection<SharedConversation>("sharedConversations");
37
+ const abortedGenerations = db.collection<AbortedGeneration>("abortedGenerations");
38
+ const settings = db.collection<Settings>("settings");
39
+ const users = db.collection<User>("users");
40
+ const sessions = db.collection<Session>("sessions");
41
+ const messageEvents = db.collection<MessageEvent>("messageEvents");
42
+ const bucket = new GridFSBucket(db, { bucketName: "files" });
43
+ const migrationResults = db.collection<MigrationResult>("migrationResults");
44
+ const semaphores = db.collection<Semaphore>("semaphores");
45
+
46
+ return {
47
+ conversations,
48
+ conversationStats,
49
+ assistants,
50
+ reports,
51
+ sharedConversations,
52
+ abortedGenerations,
53
+ settings,
54
+ users,
55
+ sessions,
56
+ messageEvents,
57
+ bucket,
58
+ migrationResults,
59
+ semaphores,
60
+ };
61
+ }
62
+ const db = client.db(MONGODB_DB_NAME + (import.meta.env.MODE === "test" ? "-test" : ""));
63
 
64
+ const collections = getCollections(client);
 
 
 
 
 
 
 
 
 
 
65
 
66
+ const {
 
67
  conversations,
68
  conversationStats,
69
  assistants,
 
74
  users,
75
  sessions,
76
  messageEvents,
77
+ semaphores,
78
+ } = collections;
79
+
80
+ export { client, db, collections };
81
 
82
  client.on("open", () => {
83
  conversations
 
145
  assistants.createIndex({ searchTokens: 1 }).catch(console.error);
146
  reports.createIndex({ assistantId: 1 }).catch(console.error);
147
  reports.createIndex({ createdBy: 1, assistantId: 1 }).catch(console.error);
148
+
149
+ // Unique index for semaphore and migration results
150
+ semaphores.createIndex({ key: 1 }, { unique: true }).catch(console.error);
151
+ semaphores.createIndex({ createdAt: 1 }, { expireAfterSeconds: 60 }).catch(console.error);
152
  });
src/lib/types/MigrationResult.ts ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import type { ObjectId } from "mongodb";
2
+
3
+ export interface MigrationResult {
4
+ _id: ObjectId;
5
+ name: string;
6
+ status: "success" | "failure" | "ongoing";
7
+ }
src/lib/types/Semaphore.ts ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import type { Timestamps } from "./Timestamps";
2
+
3
+ export interface Semaphore extends Timestamps {
4
+ key: string;
5
+ }