File size: 4,622 Bytes
21dd449
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import { getHFHubCachePath, getRepoFolderName } from "./cache-management";
import { dirname, join } from "node:path";
import { rename, lstat, mkdir, stat } from "node:fs/promises";
import type { CommitInfo, PathInfo } from "./paths-info";
import { pathsInfo } from "./paths-info";
import type { CredentialsParams, RepoDesignation } from "../types/public";
import { toRepoId } from "../utils/toRepoId";
import { downloadFile } from "./download-file";
import { createSymlink } from "../utils/symlink";
import { Readable } from "node:stream";
import type { ReadableStream } from "node:stream/web";
import { pipeline } from "node:stream/promises";
import { createWriteStream } from "node:fs";

export const REGEX_COMMIT_HASH: RegExp = new RegExp("^[0-9a-f]{40}$");

function getFilePointer(storageFolder: string, revision: string, relativeFilename: string): string {
	const snapshotPath = join(storageFolder, "snapshots");
	return join(snapshotPath, revision, relativeFilename);
}

/**
 * handy method to check if a file exists, or the pointer of a symlinks exists
 * @param path
 * @param followSymlinks
 */
async function exists(path: string, followSymlinks?: boolean): Promise<boolean> {
	try {
		if (followSymlinks) {
			await stat(path);
		} else {
			await lstat(path);
		}
		return true;
	} catch (err: unknown) {
		return false;
	}
}

/**
 * Download a given file if it's not already present in the local cache.
 * @param params
 * @return the symlink to the blob object
 */
export async function downloadFileToCacheDir(
	params: {
		repo: RepoDesignation;
		path: string;
		/**
		 * If true, will download the raw git file.
		 *
		 * For example, when calling on a file stored with Git LFS, the pointer file will be downloaded instead.
		 */
		raw?: boolean;
		/**
		 * An optional Git revision id which can be a branch name, a tag, or a commit hash.
		 *
		 * @default "main"
		 */
		revision?: string;
		hubUrl?: string;
		cacheDir?: string;
		/**
		 * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
		 */
		fetch?: typeof fetch;
	} & Partial<CredentialsParams>
): Promise<string> {
	// get revision provided or default to main
	const revision = params.revision ?? "main";
	const cacheDir = params.cacheDir ?? getHFHubCachePath();
	// get repo id
	const repoId = toRepoId(params.repo);
	// get storage folder
	const storageFolder = join(cacheDir, getRepoFolderName(repoId));

	let commitHash: string | undefined;

	// if user provides a commitHash as revision, and they already have the file on disk, shortcut everything.
	if (REGEX_COMMIT_HASH.test(revision)) {
		commitHash = revision;
		const pointerPath = getFilePointer(storageFolder, revision, params.path);
		if (await exists(pointerPath, true)) return pointerPath;
	}

	const pathsInformation: (PathInfo & { lastCommit: CommitInfo })[] = await pathsInfo({
		...params,
		paths: [params.path],
		revision: revision,
		expand: true,
	});
	if (!pathsInformation || pathsInformation.length !== 1) throw new Error(`cannot get path info for ${params.path}`);

	let etag: string;
	if (pathsInformation[0].lfs) {
		etag = pathsInformation[0].lfs.oid; // get the LFS pointed file oid
	} else {
		etag = pathsInformation[0].oid; // get the repo file if not a LFS pointer
	}

	const pointerPath = getFilePointer(storageFolder, commitHash ?? pathsInformation[0].lastCommit.id, params.path);
	const blobPath = join(storageFolder, "blobs", etag);

	// if we have the pointer file, we can shortcut the download
	if (await exists(pointerPath, true)) return pointerPath;

	// mkdir blob and pointer path parent directory
	await mkdir(dirname(blobPath), { recursive: true });
	await mkdir(dirname(pointerPath), { recursive: true });

	// We might already have the blob but not the pointer
	// shortcut the download if needed
	if (await exists(blobPath)) {
		// create symlinks in snapshot folder to blob object
		await createSymlink({ sourcePath: blobPath, finalPath: pointerPath });
		return pointerPath;
	}

	const incomplete = `${blobPath}.incomplete`;
	console.debug(`Downloading ${params.path} to ${incomplete}`);

	const blob: Blob | null = await downloadFile({
		...params,
		revision: commitHash,
	});

	if (!blob) {
		throw new Error(`invalid response for file ${params.path}`);
	}

	await pipeline(Readable.fromWeb(blob.stream() as ReadableStream), createWriteStream(incomplete));

	// rename .incomplete file to expect blob
	await rename(incomplete, blobPath);
	// create symlinks in snapshot folder to blob object
	await createSymlink({ sourcePath: blobPath, finalPath: pointerPath });
	return pointerPath;
}