Spaces:

danilovabg
/

detection_project

Build error

App Files Files Community

detection_project / mmdetection /.dev_scripts /check_links.py

danilovabg

Upload 2759 files

c7e10ef over 2 years ago

raw

history blame contribute delete

5.06 kB

	# Modified from:
	# https://github.com/allenai/allennlp/blob/main/scripts/check_links.py

	import argparse
	import logging
	import os
	import pathlib
	import re
	import sys
	from multiprocessing.dummy import Pool
	from typing import NamedTuple, Optional, Tuple

	import requests
	from mmcv.utils import get_logger


	def parse_args():
	parser = argparse.ArgumentParser(
	description='Goes through all the inline-links '
	'in markdown files and reports the breakages')
	parser.add_argument(
	'--num-threads',
	type=int,
	default=100,
	help='Number of processes to confirm the link')
	parser.add_argument('--https-proxy', type=str, help='https proxy')
	parser.add_argument(
	'--out',
	type=str,
	default='link_reports.txt',
	help='output path of reports')
	args = parser.parse_args()
	return args


	OK_STATUS_CODES = (
	200,
	401, # the resource exists but may require some sort of login.
	403, # ^ same
	405, # HEAD method not allowed.
	# the resource exists, but our default 'Accept-' header may not
	# match what the server can provide.
	406,
	)


	class MatchTuple(NamedTuple):
	source: str
	name: str
	link: str


	def check_link(
	match_tuple: MatchTuple,
	http_session: requests.Session,
	logger: logging = None) -> Tuple[MatchTuple, bool, Optional[str]]:
	reason: Optional[str] = None
	if match_tuple.link.startswith('http'):
	result_ok, reason = check_url(match_tuple, http_session)
	else:
	result_ok = check_path(match_tuple)
	if logger is None:
	print(f" {'✓' if result_ok else '✗'} {match_tuple.link}")
	else:
	logger.info(f" {'✓' if result_ok else '✗'} {match_tuple.link}")
	return match_tuple, result_ok, reason


	def check_url(match_tuple: MatchTuple,
	http_session: requests.Session) -> Tuple[bool, str]:
	"""Check if a URL is reachable."""
	try:
	result = http_session.head(
	match_tuple.link, timeout=5, allow_redirects=True)
	return (
	result.ok or result.status_code in OK_STATUS_CODES,
	f'status code = {result.status_code}',
	)
	except (requests.ConnectionError, requests.Timeout):
	return False, 'connection error'


	def check_path(match_tuple: MatchTuple) -> bool:
	"""Check if a file in this repository exists."""
	relative_path = match_tuple.link.split('#')[0]
	full_path = os.path.join(
	os.path.dirname(str(match_tuple.source)), relative_path)
	return os.path.exists(full_path)


	def main():
	args = parse_args()

	# setup logger
	logger = get_logger(name='mmdet', log_file=args.out)

	# setup https_proxy
	if args.https_proxy:
	os.environ['https_proxy'] = args.https_proxy

	# setup http_session
	http_session = requests.Session()
	for resource_prefix in ('http://', 'https://'):
	http_session.mount(
	resource_prefix,
	requests.adapters.HTTPAdapter(
	max_retries=5,
	pool_connections=20,
	pool_maxsize=args.num_threads),
	)

	logger.info('Finding all markdown files in the current directory...')

	project_root = (pathlib.Path(__file__).parent / '..').resolve()
	markdown_files = project_root.glob('*/.md')

	all_matches = set()
	url_regex = re.compile(r'\[([^!][^\]]+)\]\(([^)(]+)\)')
	for markdown_file in markdown_files:
	with open(markdown_file) as handle:
	for line in handle.readlines():
	matches = url_regex.findall(line)
	for name, link in matches:
	if 'localhost' not in link:
	all_matches.add(
	MatchTuple(
	source=str(markdown_file),
	name=name,
	link=link))

	logger.info(f' {len(all_matches)} markdown files found')
	logger.info('Checking to make sure we can retrieve each link...')

	with Pool(processes=args.num_threads) as pool:
	results = pool.starmap(check_link, [(match, http_session, logger)
	for match in list(all_matches)])

	# collect unreachable results
	unreachable_results = [(match_tuple, reason)
	for match_tuple, success, reason in results
	if not success]

	if unreachable_results:
	logger.info('================================================')
	logger.info(f'Unreachable links ({len(unreachable_results)}):')
	for match_tuple, reason in unreachable_results:
	logger.info(' > Source: ' + match_tuple.source)
	logger.info(' Name: ' + match_tuple.name)
	logger.info(' Link: ' + match_tuple.link)
	if reason is not None:
	logger.info(' Reason: ' + reason)
	sys.exit(1)
	logger.info('No Unreachable link found.')


	if __name__ == '__main__':
	main()