File size: 7,904 Bytes
8ea6360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
#!/usr/bin/env perl
use Mojo::Base -strict, -signatures;

# see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control for a good reference on Cache-Control headers in general

use Mojo::UserAgent;

# optional configuration for Docker Hub credentials to help with rate limiting
# WARNING: this will allow unauthenticated access for anyone who can hit the proxy to anything this user has access to, so please use it with care!!
# https://docs.docker.com/docker-hub/download-rate-limit/
# https://github.com/docker/hub-feedback/issues/1907
my @creds = split(/\n/, Mojo::Util::trim($ENV{DOCKERHUB_PUBLIC_PROXY_CREDENTIALS} // ''));

# max_response_size justification: https://github.com/opencontainers/distribution-spec/pull/293#issuecomment-1452780554 (allowing slightly more than registries should, just to be safe)
my $ua = Mojo::UserAgent->new->max_redirects(0)->connect_timeout(20)->inactivity_timeout(20)->max_response_size(5 * 1024 * 1024);
$ua->transactor->name($ENV{DOCKERHUB_PUBLIC_PROXY_USER_AGENT} // 'https://github.com/tianon/dockerhub-public-proxy');

sub _cred {
	state $i = 0;
	return undef unless @creds;
	my $cred = $creds[$i];
	$i = ($i + 1) % @creds;
	return $cred;
}

sub _registry_req_p {
	my $tries = shift;
	my $method = shift;
	my $repo = shift;
	my $url = shift;
	my %extHeaders = @_;

	--$tries;
	my $lastTry = $tries < 1;

	my %headers = (
		%extHeaders,
	);

	state %tokens;
	if (my $token = $tokens{$repo}) {
		$headers{Authorization} = "Bearer $token";
	}

	my $methodP = lc $method . '_p';
	return $ua->$methodP($url, \%headers)->then(sub {
		my $tx = shift;
		if (!$lastTry && $tx->res->code == 401) {
			# "Unauthorized" -- we must need to go fetch a token for this registry request (so let's go do that, then retry the original registry request)
			my $auth = $tx->res->headers->www_authenticate;
			die "unexpected WWW-Authenticate header: $auth" unless $auth =~ m{ ^ Bearer \s+ (\S.*) $ }x;
			my $realm = $1;
			my $authUrl = Mojo::URL->new;
			while ($realm =~ m{
				# key="val",
				([^=]+)
				=
				"([^"]+)"
				,?
			}xg) {
				my ($key, $val) = ($1, $2);
				next if $key eq 'error';
				if ($key eq 'realm') {
					$authUrl->base(Mojo::URL->new($val));
				} else {
					$authUrl->query->append($key => $val);
				}
			}
			$authUrl = $authUrl->to_abs;
			if (my $cred = _cred) {
				# see description of DOCKERHUB_PUBLIC_PROXY_CREDENTIALS above
				$authUrl->userinfo($cred);
			}
			return $ua->get_p($authUrl->to_unsafe_string)->then(sub {
				my $tokenTx = shift;
				if (my $error = $tokenTx->error) {
					die "failed to fetch token for $repo: " . ($error->{code} ? $error->{code} . ' -- ' : '') . $error->{message};
				}
				$tokens{$repo} = $tokenTx->res->json->{token};
				return _registry_req_p($tries, $method, $repo, $url, %extHeaders);
			});
		}

		return $tx;
	});
}
sub registry_req_p {
	my $method = shift;
	my $repo = shift;
	my $url = shift;
	my %extHeaders = @_;

	$url = "https://registry-1.docker.io/v2/$repo/$url";

	# we allow exactly two tries for a given request to account for at most one auth round-trip + one retry
	return _registry_req_p(2, $method, $repo, $url, %extHeaders);
}

use Mojolicious::Lite;

any [ 'GET', 'HEAD' ] => '/v2/#org/#repo/*url' => sub ($c) {
	$c->render_later;

	my $repo = $c->param('org') . '/' . $c->param('repo');
	my $url = $c->param('url');

	# ignore request headers (they can cause issues with returning the wrong "Location:" redirects thanks to X-Forwarded-*, for example)
	my %headers = (
		# upgrade useless Accept: header so "curl" is useful OOTB instead of returning a v1 manifest
		# ... and clients that don't accept manifest lists so they don't screw up clients that do (we don't support clients that don't support manifest lists)
		Accept => [
			'application/vnd.docker.distribution.manifest.list.v2+json',
			'application/vnd.docker.distribution.manifest.v2+json',
			'application/vnd.oci.image.index.v1+json',
			'application/vnd.oci.image.manifest.v1+json',
			'application/vnd.oci.image.config.v1+json',
			'*/*',
		],
	);

	my $cacheable = $url =~ m!/sha256:!;
	my $tagRequest = !$cacheable && $url =~ m!^manifests/!;

	return registry_req_p(($tagRequest ? 'HEAD' : $c->req->method), $repo, $url, %headers)->then(sub {
		my $tx = shift;

		$c->res->headers->from_hash({})->from_hash($tx->res->headers->to_hash(1));

		# TODO check for $digest sooner and set $cacheable based on whether the request URL contains the digest returned?

		my $maxAge = 0;
		if ($cacheable) {
			# looks like a content-addressable digest -- literally by definition, that content can't change, so let's tell the client that via cache-control (if the response is something resembling success, anyhow)
			if ($tx->res->code == 200 || $tx->res->code == 301 || $tx->res->code == 308) {
				# 200 = success, 301 = Moved Permanently, 308 = Permanent Redirect
				$maxAge = 365 * 24 * 60 * 60;
				# https://stackoverflow.com/a/25201898/433558
			}
			elsif ($tx->res->code >= 300 && $tx->res->code < 400) {
				# other redirects are likely somewhat less cacheable (temporary), so let's dial it back
				$maxAge = 30 * 60;
			}
		}

		if ($maxAge) {
			$c->res->headers->cache_control('public, max-age=' . $maxAge);
		}
		else {
			# don't cache non-digests
			$c->res->headers->cache_control('no-cache');
		}

		my $digest = $tx->res->headers->header('docker-content-digest');
		if (
			$tx->res->code == 200
			&& !$c->res->headers->content_length
			&& $digest
			&& $digest ne 'sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855' # this is the sha256 of the empty string (zero bytes)
		) {
			$c->res->headers->remove('Content-Length');
			return $c->render(status => 500, data => "response (unexpectedly) missing content-length (digest $digest)");
		}

		# it doesn't make any sense to redirect HEAD requests -- they're not very cacheable anyhow, so all that does is double the number of requests-per-request
		# HOWEVER, the spec says that HEAD and GET *must* be interchangeable, so there are likely still glitches when they aren't even when we're asking for no-cache
		if ($tagRequest) {
			if ($c->req->method ne 'HEAD') {
				# if we converted the request to HEAD, we need to axe the Content-Length header value because we don't have the content that goes with it :D
				$c->res->headers->content_length(0);
				# (and if we're not about to redirect, we're going to error in some way, either our own error or passing along upstream's error, like 404)
			}

			if ($digest) {
				$c->res->headers->content_length(0);
				return $c->redirect_to("/v2/$repo/manifests/$digest");
			}
			elsif ($tx->res->code == 200) {
				$c->res->headers->remove('Content-Length');
				return $c->render(status => 500, data => 'we converted the request to a HEAD, but we need to generate a redirect and did not get a Docker-Content-Digest header to tell us where to redirect to');
			}
		}

		$c->render(data => $tx->res->body, status => $tx->res->code);
	}, sub {
		$c->reply->exception(@_);
	});
};
#any '/v2/*url' => { url => '' } => sub {
#	my $c = shift;
#	return $c->redirect_to('https://registry-1.docker.io/v2/' . $c->param('url'));
#};
get '/v2/' => sub ($c) {
	# this is often used as a "ping" endpoint (https://github.com/opencontainers/distribution-spec/blob/v1.1.1/spec.md#determining-support)
	# let's let this be cached for 24h (possibly reused stale cache up to 48h)? 🤷  we could set this even longer but this feels like a good balance between *too* long if it ever does need to change and too short wasting cycles revalidating all the time
	my $twentyFourHours = 24 * 60 * 60;
	$c->res->headers->cache_control("public, max-age=$twentyFourHours, stale-while-revalidate=$twentyFourHours, stale-if-error=$twentyFourHours");
	$c->render(json => {});
};
get '/' => sub ($c) {
	return $c->redirect_to('https://github.com/tianon/dockerhub-public-proxy');
};

app->start;