Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 5,568 Bytes
94753b6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 |
import { omit } from "../src/utils/omit";
import { isBackend, isFrontend } from "../../shared";
import { HF_HUB_URL } from "../src/lib/getDefaultTask";
const TAPES_FILE = "./tapes.json";
const BASE64_PREFIX = "data:application/octet-stream;base64,";
enum MODE {
RECORD = "record",
PLAYBACK = "playback",
CACHE = "cache",
DISABLED = "disabled",
}
let VCR_MODE: MODE;
/**
* Allows to record tapes with a token to avoid rate limit.
*
* If VCR_MODE is not set and a token is present then disable it.
*/
const env = import.meta.env;
if (env.VCR_MODE) {
if ((env.VCR_MODE === MODE.RECORD || env.VCR_MODE === MODE.CACHE) && isFrontend) {
throw new Error("VCR_MODE=record is not supported in the browser");
}
VCR_MODE = env.VCR_MODE as MODE;
} else {
VCR_MODE = env.HF_TOKEN ? MODE.DISABLED : MODE.PLAYBACK;
}
const originalFetch = globalThis.fetch;
globalThis.fetch = (...args) => vcr(originalFetch, args[0], args[1]);
/**
* Represents a recorded HTTP request
*/
interface Tape {
url: string;
init?: RequestInit;
response: {
/**
* Base64 string of the response body
*/
body: string;
status: number;
statusText: string;
headers?: HeadersInit;
};
}
async function tapeToResponse(tape: Tape) {
return new Response(
tape.response.body?.startsWith(BASE64_PREFIX) ? (await originalFetch(tape.response.body)).body : tape.response.body,
{
status: tape.response.status,
statusText: tape.response.statusText,
headers: tape.response.headers,
}
);
}
/**
* Headers are volontarily skipped for now. They are not useful to distinguish requests
* but bring more complexity because some of them are not deterministics like "date"
* and it's complex to handle all the formats they can be given in.
*/
async function hashRequest(url: string, init: RequestInit): Promise<string> {
const hashObject = {
url,
method: init.method,
body: init.body,
};
const inputBuffer = new TextEncoder().encode(JSON.stringify(hashObject));
let hashed: ArrayBuffer;
if (isBackend) {
const crypto = await import("node:crypto");
hashed = await crypto.subtle.digest("SHA-256", inputBuffer);
} else {
hashed = await crypto.subtle.digest("SHA-256", inputBuffer);
}
return Array.from(new Uint8Array(hashed))
.map((b) => b.toString(16).padStart(2, "0"))
.join("");
}
/**
* This function behavior change according to the value of the VCR_MODE environment variable:
* - record: requests will be made to the external API and responses will be saved in files
* - playback: answers will be read from the filesystem, if they don't have been recorded before then an error will be thrown
* - cache: same as playback but if the response is not found in the filesystem then it will be recorded
*/
async function vcr(
originalFetch: typeof global.fetch,
input: RequestInfo | URL,
init: RequestInit = {}
): Promise<Response> {
let url: string;
if (typeof input === "string") {
url = input;
} else if (input instanceof URL) {
url = input.href;
} else {
url = input.url;
}
const hash = await hashRequest(url, init);
const { default: tapes } = await import(TAPES_FILE);
if (VCR_MODE === MODE.PLAYBACK && !url.startsWith(HF_HUB_URL)) {
if (!tapes[hash]) {
throw new Error(`Tape not found: ${hash} (${url})`);
}
const response = tapeToResponse(tapes[hash]);
return response;
}
if (VCR_MODE === "cache" && tapes[hash]) {
const response = tapeToResponse(tapes[hash]);
return response;
}
const response = await originalFetch(input, init);
if (url.startsWith(HF_HUB_URL)) {
return response;
}
if (VCR_MODE === MODE.RECORD || VCR_MODE === MODE.CACHE) {
const isText =
response.headers.get("Content-Type")?.includes("json") || response.headers.get("Content-Type")?.includes("text");
const isJson = response.headers.get("Content-Type")?.includes("json");
const arrayBuffer = await response.arrayBuffer();
let body = "";
if (isText || isJson) {
body = new TextDecoder().decode(arrayBuffer);
if (isJson) {
// check for base64 strings and truncate them
body = JSON.stringify(
JSON.parse(body, (key: unknown, value: unknown): unknown => {
if (
typeof value === "string" &&
value.length > 1_000 &&
// base64 heuristic
value.length % 4 === 0 &&
value.match(/^[a-zA-Z0-9+/]+={0,2}$/)
) {
return value.slice(0, 1_000);
} else {
return value;
}
})
);
}
} else {
// // Alternative to also save binary data:
// arrayBuffer.byteLength > 30_000
// ? ""
// : isText
// ? new TextDecoder().decode(arrayBuffer)
// : BASE64_PREFIX + base64FromBytes(new Uint8Array(arrayBuffer)),
body = "";
}
const tape: Tape = {
url,
init: {
headers: init.headers && omit(init.headers as Record<string, string>, "Authorization"),
method: init.method,
body: typeof init.body === "string" && init.body.length < 1_000 ? init.body : undefined,
},
response: {
body,
status: response.status,
statusText: response.statusText,
headers: Object.fromEntries(
// Remove varying headers as much as possible
[...response.headers.entries()].filter(
([key]) => key !== "date" && key !== "content-length" && !key.startsWith("x-") && key !== "via"
)
),
},
};
tapes[hash] = tape;
const { writeFileSync } = await import("node:fs");
writeFileSync(`./test/${TAPES_FILE}`, JSON.stringify(tapes, null, 2));
// Return a new response with an unconsummed body
return tapeToResponse(tape);
}
return response;
}
|