Spaces:

Steveeeeeeen
/

Open_ASR_Leaderboard

Running

App Files Files Community

Open_ASR_Leaderboard / .venv /lib /python3.10 /site-packages /httpx /_utils.py

Steveeeeeeen HF staff

Upload folder using huggingface_hub

9f0d781 verified 3 months ago

raw

history blame

13.9 kB

	from __future__ import annotations

	import codecs
	import email.message
	import ipaddress
	import mimetypes
	import os
	import re
	import time
	import typing
	from pathlib import Path
	from urllib.request import getproxies

	import sniffio

	from ._types import PrimitiveData

	if typing.TYPE_CHECKING: # pragma: no cover
	from ._urls import URL


	_HTML5_FORM_ENCODING_REPLACEMENTS = {'"': "%22", "\\": "\\\\"}
	_HTML5_FORM_ENCODING_REPLACEMENTS.update(
	{chr(c): "%{:02X}".format(c) for c in range(0x1F + 1) if c != 0x1B}
	)
	_HTML5_FORM_ENCODING_RE = re.compile(
	r"\|".join([re.escape(c) for c in _HTML5_FORM_ENCODING_REPLACEMENTS.keys()])
	)


	def normalize_header_key(
	value: str \| bytes,
	lower: bool,
	encoding: str \| None = None,
	) -> bytes:
	"""
	Coerce str/bytes into a strictly byte-wise HTTP header key.
	"""
	if isinstance(value, bytes):
	bytes_value = value
	else:
	bytes_value = value.encode(encoding or "ascii")

	return bytes_value.lower() if lower else bytes_value


	def normalize_header_value(value: str \| bytes, encoding: str \| None = None) -> bytes:
	"""
	Coerce str/bytes into a strictly byte-wise HTTP header value.
	"""
	if isinstance(value, bytes):
	return value
	return value.encode(encoding or "ascii")


	def primitive_value_to_str(value: PrimitiveData) -> str:
	"""
	Coerce a primitive data type into a string value.

	Note that we prefer JSON-style 'true'/'false' for boolean values here.
	"""
	if value is True:
	return "true"
	elif value is False:
	return "false"
	elif value is None:
	return ""
	return str(value)


	def is_known_encoding(encoding: str) -> bool:
	"""
	Return `True` if `encoding` is a known codec.
	"""
	try:
	codecs.lookup(encoding)
	except LookupError:
	return False
	return True


	def format_form_param(name: str, value: str) -> bytes:
	"""
	Encode a name/value pair within a multipart form.
	"""

	def replacer(match: typing.Match[str]) -> str:
	return _HTML5_FORM_ENCODING_REPLACEMENTS[match.group(0)]

	value = _HTML5_FORM_ENCODING_RE.sub(replacer, value)
	return f'{name}="{value}"'.encode()


	def get_ca_bundle_from_env() -> str \| None:
	if "SSL_CERT_FILE" in os.environ:
	ssl_file = Path(os.environ["SSL_CERT_FILE"])
	if ssl_file.is_file():
	return str(ssl_file)
	if "SSL_CERT_DIR" in os.environ:
	ssl_path = Path(os.environ["SSL_CERT_DIR"])
	if ssl_path.is_dir():
	return str(ssl_path)
	return None


	def parse_header_links(value: str) -> list[dict[str, str]]:
	"""
	Returns a list of parsed link headers, for more info see:
	https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Link
	The generic syntax of those is:
	Link: < uri-reference >; param1=value1; param2="value2"
	So for instance:
	Link; '<http:/.../front.jpeg>; type="image/jpeg",<http://.../back.jpeg>;'
	would return
	[
	{"url": "http:/.../front.jpeg", "type": "image/jpeg"},
	{"url": "http://.../back.jpeg"},
	]
	:param value: HTTP Link entity-header field
	:return: list of parsed link headers
	"""
	links: list[dict[str, str]] = []
	replace_chars = " '\""
	value = value.strip(replace_chars)
	if not value:
	return links
	for val in re.split(", *<", value):
	try:
	url, params = val.split(";", 1)
	except ValueError:
	url, params = val, ""
	link = {"url": url.strip("<> '\"")}
	for param in params.split(";"):
	try:
	key, value = param.split("=")
	except ValueError:
	break
	link[key.strip(replace_chars)] = value.strip(replace_chars)
	links.append(link)
	return links


	def parse_content_type_charset(content_type: str) -> str \| None:
	# We used to use `cgi.parse_header()` here, but `cgi` became a dead battery.
	# See: https://peps.python.org/pep-0594/#cgi
	msg = email.message.Message()
	msg["content-type"] = content_type
	return msg.get_content_charset(failobj=None)


	SENSITIVE_HEADERS = {"authorization", "proxy-authorization"}


	def obfuscate_sensitive_headers(
	items: typing.Iterable[tuple[typing.AnyStr, typing.AnyStr]],
	) -> typing.Iterator[tuple[typing.AnyStr, typing.AnyStr]]:
	for k, v in items:
	if to_str(k.lower()) in SENSITIVE_HEADERS:
	v = to_bytes_or_str("[secure]", match_type_of=v)
	yield k, v


	def port_or_default(url: URL) -> int \| None:
	if url.port is not None:
	return url.port
	return {"http": 80, "https": 443}.get(url.scheme)


	def same_origin(url: URL, other: URL) -> bool:
	"""
	Return 'True' if the given URLs share the same origin.
	"""
	return (
	url.scheme == other.scheme
	and url.host == other.host
	and port_or_default(url) == port_or_default(other)
	)


	def is_https_redirect(url: URL, location: URL) -> bool:
	"""
	Return 'True' if 'location' is a HTTPS upgrade of 'url'
	"""
	if url.host != location.host:
	return False

	return (
	url.scheme == "http"
	and port_or_default(url) == 80
	and location.scheme == "https"
	and port_or_default(location) == 443
	)


	def get_environment_proxies() -> dict[str, str \| None]:
	"""Gets proxy information from the environment"""

	# urllib.request.getproxies() falls back on System
	# Registry and Config for proxies on Windows and macOS.
	# We don't want to propagate non-HTTP proxies into
	# our configuration such as 'TRAVIS_APT_PROXY'.
	proxy_info = getproxies()
	mounts: dict[str, str \| None] = {}

	for scheme in ("http", "https", "all"):
	if proxy_info.get(scheme):
	hostname = proxy_info[scheme]
	mounts[f"{scheme}://"] = (
	hostname if "://" in hostname else f"http://{hostname}"
	)

	no_proxy_hosts = [host.strip() for host in proxy_info.get("no", "").split(",")]
	for hostname in no_proxy_hosts:
	# See https://curl.haxx.se/libcurl/c/CURLOPT_NOPROXY.html for details
	# on how names in `NO_PROXY` are handled.
	if hostname == "*":
	# If NO_PROXY=* is used or if "*" occurs as any one of the comma
	# separated hostnames, then we should just bypass any information
	# from HTTP_PROXY, HTTPS_PROXY, ALL_PROXY, and always ignore
	# proxies.
	return {}
	elif hostname:
	# NO_PROXY=.google.com is marked as "all://*.google.com,
	# which disables "www.google.com" but not "google.com"
	# NO_PROXY=google.com is marked as "all://*google.com,
	# which disables "www.google.com" and "google.com".
	# (But not "wwwgoogle.com")
	# NO_PROXY can include domains, IPv6, IPv4 addresses and "localhost"
	# NO_PROXY=example.com,::1,localhost,192.168.0.0/16
	if "://" in hostname:
	mounts[hostname] = None
	elif is_ipv4_hostname(hostname):
	mounts[f"all://{hostname}"] = None
	elif is_ipv6_hostname(hostname):
	mounts[f"all://[{hostname}]"] = None
	elif hostname.lower() == "localhost":
	mounts[f"all://{hostname}"] = None
	else:
	mounts[f"all://*{hostname}"] = None

	return mounts


	def to_bytes(value: str \| bytes, encoding: str = "utf-8") -> bytes:
	return value.encode(encoding) if isinstance(value, str) else value


	def to_str(value: str \| bytes, encoding: str = "utf-8") -> str:
	return value if isinstance(value, str) else value.decode(encoding)


	def to_bytes_or_str(value: str, match_type_of: typing.AnyStr) -> typing.AnyStr:
	return value if isinstance(match_type_of, str) else value.encode()


	def unquote(value: str) -> str:
	return value[1:-1] if value[0] == value[-1] == '"' else value


	def guess_content_type(filename: str \| None) -> str \| None:
	if filename:
	return mimetypes.guess_type(filename)[0] or "application/octet-stream"
	return None


	def peek_filelike_length(stream: typing.Any) -> int \| None:
	"""
	Given a file-like stream object, return its length in number of bytes
	without reading it into memory.
	"""
	try:
	# Is it an actual file?
	fd = stream.fileno()
	# Yup, seems to be an actual file.
	length = os.fstat(fd).st_size
	except (AttributeError, OSError):
	# No... Maybe it's something that supports random access, like `io.BytesIO`?
	try:
	# Assuming so, go to end of stream to figure out its length,
	# then put it back in place.
	offset = stream.tell()
	length = stream.seek(0, os.SEEK_END)
	stream.seek(offset)
	except (AttributeError, OSError):
	# Not even that? Sorry, we're doomed...
	return None

	return length


	class Timer:
	async def _get_time(self) -> float:
	library = sniffio.current_async_library()
	if library == "trio":
	import trio

	return trio.current_time()
	else:
	import asyncio

	return asyncio.get_event_loop().time()

	def sync_start(self) -> None:
	self.started = time.perf_counter()

	async def async_start(self) -> None:
	self.started = await self._get_time()

	def sync_elapsed(self) -> float:
	now = time.perf_counter()
	return now - self.started

	async def async_elapsed(self) -> float:
	now = await self._get_time()
	return now - self.started


	class URLPattern:
	"""
	A utility class currently used for making lookups against proxy keys...

	# Wildcard matching...
	>>> pattern = URLPattern("all://")
	>>> pattern.matches(httpx.URL("http://example.com"))
	True

	# Witch scheme matching...
	>>> pattern = URLPattern("https://")
	>>> pattern.matches(httpx.URL("https://example.com"))
	True
	>>> pattern.matches(httpx.URL("http://example.com"))
	False

	# With domain matching...
	>>> pattern = URLPattern("https://example.com")
	>>> pattern.matches(httpx.URL("https://example.com"))
	True
	>>> pattern.matches(httpx.URL("http://example.com"))
	False
	>>> pattern.matches(httpx.URL("https://other.com"))
	False

	# Wildcard scheme, with domain matching...
	>>> pattern = URLPattern("all://example.com")
	>>> pattern.matches(httpx.URL("https://example.com"))
	True
	>>> pattern.matches(httpx.URL("http://example.com"))
	True
	>>> pattern.matches(httpx.URL("https://other.com"))
	False

	# With port matching...
	>>> pattern = URLPattern("https://example.com:1234")
	>>> pattern.matches(httpx.URL("https://example.com:1234"))
	True
	>>> pattern.matches(httpx.URL("https://example.com"))
	False
	"""

	def __init__(self, pattern: str) -> None:
	from ._urls import URL

	if pattern and ":" not in pattern:
	raise ValueError(
	f"Proxy keys should use proper URL forms rather "
	f"than plain scheme strings. "
	f'Instead of "{pattern}", use "{pattern}://"'
	)

	url = URL(pattern)
	self.pattern = pattern
	self.scheme = "" if url.scheme == "all" else url.scheme
	self.host = "" if url.host == "*" else url.host
	self.port = url.port
	if not url.host or url.host == "*":
	self.host_regex: typing.Pattern[str] \| None = None
	elif url.host.startswith("*."):
	# *.example.com should match "www.example.com", but not "example.com"
	domain = re.escape(url.host[2:])
	self.host_regex = re.compile(f"^.+\\.{domain}$")
	elif url.host.startswith("*"):
	# *example.com should match "www.example.com" and "example.com"
	domain = re.escape(url.host[1:])
	self.host_regex = re.compile(f"^(.+\\.)?{domain}$")
	else:
	# example.com should match "example.com" but not "www.example.com"
	domain = re.escape(url.host)
	self.host_regex = re.compile(f"^{domain}$")

	def matches(self, other: URL) -> bool:
	if self.scheme and self.scheme != other.scheme:
	return False
	if (
	self.host
	and self.host_regex is not None
	and not self.host_regex.match(other.host)
	):
	return False
	if self.port is not None and self.port != other.port:
	return False
	return True

	@property
	def priority(self) -> tuple[int, int, int]:
	"""
	The priority allows URLPattern instances to be sortable, so that
	we can match from most specific to least specific.
	"""
	# URLs with a port should take priority over URLs without a port.
	port_priority = 0 if self.port is not None else 1
	# Longer hostnames should match first.
	host_priority = -len(self.host)
	# Longer schemes should match first.
	scheme_priority = -len(self.scheme)
	return (port_priority, host_priority, scheme_priority)

	def __hash__(self) -> int:
	return hash(self.pattern)

	def __lt__(self, other: URLPattern) -> bool:
	return self.priority < other.priority

	def __eq__(self, other: typing.Any) -> bool:
	return isinstance(other, URLPattern) and self.pattern == other.pattern


	def is_ipv4_hostname(hostname: str) -> bool:
	try:
	ipaddress.IPv4Address(hostname.split("/")[0])
	except Exception:
	return False
	return True


	def is_ipv6_hostname(hostname: str) -> bool:
	try:
	ipaddress.IPv6Address(hostname.split("/")[0])
	except Exception:
	return False
	return True