multimodalart HF Staff commited on
Commit
f478a41
·
verified ·
1 Parent(s): e666fa8

Create proxy.js

Browse files
Files changed (1) hide show
  1. proxy.js +94 -0
proxy.js ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const express = require('express');
2
+ const fetch = require('node-fetch');
3
+
4
+ const app = express();
5
+ const TARGET_URL = 'https://rhknk53jznw37un7.us-east-1.aws.endpoints.huggingface.cloud';
6
+ const MAX_PARALLEL = 4;
7
+ const MAX_WAIT_MS = 10 * 60 * 1000; // 10 minutes
8
+
9
+ let activeRequests = 0;
10
+ const queue = [];
11
+
12
+ async function processQueue() {
13
+ while (queue.length > 0 && activeRequests < MAX_PARALLEL) {
14
+ const { req, res, next } = queue.shift();
15
+ activeRequests++;
16
+ handleRequest(req, res, next).finally(() => {
17
+ activeRequests--;
18
+ processQueue();
19
+ });
20
+ }
21
+ }
22
+
23
+ async function retryWith503Backoff(url, options, startTime) {
24
+ let attempt = 0;
25
+
26
+ while (true) {
27
+ const elapsed = Date.now() - startTime;
28
+ if (elapsed > MAX_WAIT_MS) {
29
+ throw new Error('Max wait time exceeded (10 minutes)');
30
+ }
31
+
32
+ const response = await fetch(url, options);
33
+
34
+ if (response.status !== 503) {
35
+ return response;
36
+ }
37
+
38
+ // Exponential backoff: 1s, 2s, 4s, 8s, 16s, 32s, 64s...
39
+ const delay = Math.min(1000 * Math.pow(2, attempt), 64000);
40
+ attempt++;
41
+
42
+ // Check if waiting would exceed max time
43
+ if (elapsed + delay > MAX_WAIT_MS) {
44
+ throw new Error('Max wait time would be exceeded');
45
+ }
46
+
47
+ await new Promise(resolve => setTimeout(resolve, delay));
48
+ }
49
+ }
50
+
51
+ async function handleRequest(req, res, next) {
52
+ try {
53
+ const startTime = Date.now();
54
+ const targetUrl = TARGET_URL + req.url;
55
+
56
+ const options = {
57
+ method: req.method,
58
+ headers: { ...req.headers, host: new URL(TARGET_URL).host },
59
+ body: req.method !== 'GET' && req.method !== 'HEAD' ? req.body : undefined
60
+ };
61
+
62
+ const response = await retryWith503Backoff(targetUrl, options, startTime);
63
+
64
+ res.status(response.status);
65
+ response.headers.forEach((value, key) => {
66
+ res.setHeader(key, value);
67
+ });
68
+
69
+ response.body.pipe(res);
70
+ } catch (error) {
71
+ res.status(504).json({ error: error.message });
72
+ }
73
+ }
74
+
75
+ app.use(express.raw({ type: '*/*', limit: '50mb' }));
76
+
77
+ app.use((req, res, next) => {
78
+ if (activeRequests < MAX_PARALLEL) {
79
+ activeRequests++;
80
+ handleRequest(req, res, next).finally(() => {
81
+ activeRequests--;
82
+ processQueue();
83
+ });
84
+ } else {
85
+ queue.push({ req, res, next });
86
+ }
87
+ });
88
+
89
+ const PORT = process.env.PORT || 7860;
90
+ app.listen(PORT, () => {
91
+ console.log(`Reverse proxy listening on port ${PORT}`);
92
+ console.log(`Proxying to: ${TARGET_URL}`);
93
+ console.log(`Max parallel requests: ${MAX_PARALLEL}`);
94
+ });