Spaces:

Nymbo
/

Webscout

Build error

App Files Files Community

Webscout / webscout /cli.py

Abhaykoul

Upload 85 files

9e7090f verified over 1 year ago

raw

history blame contribute delete

18.7 kB

	import csv
	import logging
	import os
	from concurrent.futures import ThreadPoolExecutor, as_completed
	from datetime import datetime
	from urllib.parse import unquote
	from pathlib import Path
	import click
	from curl_cffi import requests

	from .webscout_search import WEBS
	from .utils import json_dumps, json_loads
	from .version import __version__

	logger = logging.getLogger(__name__)

	COLORS = {
	0: "black",
	1: "red",
	2: "green",
	3: "yellow",
	4: "blue",
	5: "magenta",
	6: "cyan",
	7: "bright_black",
	8: "bright_red",
	9: "bright_green",
	10: "bright_yellow",
	11: "bright_blue",
	12: "bright_magenta",
	13: "bright_cyan",
	14: "white",
	15: "bright_white",
	}


	def _save_json(jsonfile, data):
	with open(jsonfile, "w", encoding="utf-8") as file:
	file.write(json_dumps(data))


	def _save_csv(csvfile, data):
	with open(csvfile, "w", newline="", encoding="utf-8") as file:
	if data:
	headers = data[0].keys()
	writer = csv.DictWriter(file, fieldnames=headers, quoting=csv.QUOTE_MINIMAL)
	writer.writeheader()
	writer.writerows(data)


	def _print_data(data):
	if data:
	for i, e in enumerate(data, start=1):
	click.secho(f"{i}.\t {'=' * 78}", bg="black", fg="white")
	for j, (k, v) in enumerate(e.items(), start=1):
	if v:
	width = 300 if k in ("content", "href", "image", "source", "thumbnail", "url") else 78
	k = "language" if k == "detected_language" else k
	text = click.wrap_text(
	f"{v}", width=width, initial_indent="", subsequent_indent=" " * 12, preserve_paragraphs=True
	)
	else:
	text = v
	click.secho(f"{k:<12}{text}", bg="black", fg=COLORS[j], overline=True)
	input()


	def _sanitize_keywords(keywords):
	keywords = (
	keywords.replace("filetype", "")
	.replace(":", "")
	.replace('"', "'")
	.replace("site", "")
	.replace(" ", "_")
	.replace("/", "_")
	.replace("\\", "_")
	.replace(" ", "")
	)
	return keywords


	def _download_file(url, dir_path, filename, proxy):
	try:
	resp = requests.get(url, proxies=proxy, impersonate="chrome", timeout=10)
	resp.raise_for_status()
	with open(os.path.join(dir_path, filename[:200]), "wb") as file:
	file.write(resp.content)
	except Exception as ex:
	logger.debug(f"download_file url={url} {type(ex).__name__} {ex}")


	def _download_results(keywords, results, images=False, proxy=None, threads=None):
	path_type = "images" if images else "text"
	path = f"{path_type}_{keywords}_{datetime.now():%Y%m%d_%H%M%S}"
	os.makedirs(path, exist_ok=True)
	proxy = {"http": proxy, "https": proxy}

	threads = 10 if threads is None else threads
	with ThreadPoolExecutor(max_workers=threads) as executor:
	futures = []
	for i, res in enumerate(results, start=1):
	url = res["image"] if images else res["href"]
	filename = unquote(url.split("/")[-1].split("?")[0])
	f = executor.submit(_download_file, url, path, f"{i}_{filename}", proxy)
	futures.append(f)

	with click.progressbar(
	length=len(futures), label="Downloading", show_percent=True, show_pos=True, width=50
	) as bar:
	for future in as_completed(futures):
	future.result()
	bar.update(1)


	@click.group(chain=True)
	def cli():
	"""dukduckgo_search CLI tool"""
	pass


	def safe_entry_point():
	try:
	cli()
	except Exception as ex:
	click.echo(f"{type(ex).__name__}: {ex}")


	@cli.command()
	def version():
	print(__version__)
	return __version__
	@cli.command()
	@click.option("-s", "--save", is_flag=True, default=False, help="save the conversation in the json file")
	@click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150")
	def chat(save, proxy):
	"""CLI function to perform an interactive AI chat using DuckDuckGo API."""
	cache_file = "WEBS_chat_conversation.json"
	models = ["gpt-3.5", "claude-3-haiku"]
	client = WEBS(proxy=proxy)

	print("DuckDuckGo AI chat. Available models:")
	for idx, model in enumerate(models, start=1):
	print(f"{idx}. {model}")
	chosen_model_idx = input("Choose a model by entering its number[1]: ")
	chosen_model_idx = 0 if not chosen_model_idx.strip() else int(chosen_model_idx) - 1
	model = models[chosen_model_idx]
	print(f"Using model: {model}")

	if save and Path(cache_file).exists():
	with open(cache_file) as f:
	cache = json_loads(f.read())
	client._chat_vqd = cache.get("vqd", None)
	client._chat_messages = cache.get("messages", [])

	while True:
	user_input = input(f"{'-'*78}\nYou: ")
	if not user_input.strip():
	break

	resp_answer = client.chat(keywords=user_input, model=model)
	text = click.wrap_text(resp_answer, width=78, preserve_paragraphs=True)
	click.secho(f"AI: {text}", bg="black", fg="green", overline=True)

	cache = {"vqd": client._chat_vqd, "messages": client._chat_messages}
	_save_json(cache_file, cache)

	if "exit" in user_input.lower() or "quit" in user_input.lower():
	break

	@cli.command()
	@click.option("-k", "--keywords", required=True, help="text search, keywords for query")
	@click.option("-r", "--region", default="wt-wt", help="wt-wt, us-en, ru-ru, etc. -region https://duckduckgo.com/params")
	@click.option("-s", "--safesearch", default="moderate", type=click.Choice(["on", "moderate", "off"]))
	@click.option("-t", "--timelimit", default=None, type=click.Choice(["d", "w", "m", "y"]), help="day, week, month, year")
	@click.option("-m", "--max_results", default=20, help="maximum number of results, default=20")
	@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)")
	@click.option("-d", "--download", is_flag=True, default=False, help="download results to 'keywords' folder")
	@click.option("-b", "--backend", default="api", type=click.Choice(["api", "html", "lite"]), help="which backend to use")
	@click.option("-th", "--threads", default=10, help="download threads, default=10")
	@click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150")
	def text(keywords, region, safesearch, timelimit, backend, output, download, threads, max_results, proxy):
	"""CLI function to perform a text search using DuckDuckGo API."""
	data = WEBS(proxies=proxy).text(
	keywords=keywords,
	region=region,
	safesearch=safesearch,
	timelimit=timelimit,
	backend=backend,
	max_results=max_results,
	)
	keywords = _sanitize_keywords(keywords)
	filename = f"text_{keywords}_{datetime.now():%Y%m%d_%H%M%S}"
	if output == "print" and not download:
	_print_data(data)
	elif output == "csv":
	_save_csv(f"{filename}.csv", data)
	elif output == "json":
	_save_json(f"{filename}.json", data)
	if download:
	_download_results(keywords, data, proxy=proxy, threads=threads)


	@cli.command()
	@click.option("-k", "--keywords", required=True, help="answers search, keywords for query")
	@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)")
	@click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150")
	def answers(keywords, output, proxy):
	"""CLI function to perform a answers search using DuckDuckGo API."""
	data = WEBS(proxies=proxy).answers(keywords=keywords)
	filename = f"answers_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}"
	if output == "print":
	_print_data(data)
	elif output == "csv":
	_save_csv(f"{filename}.csv", data)
	elif output == "json":
	_save_json(f"{filename}.json", data)


	@cli.command()
	@click.option("-k", "--keywords", required=True, help="keywords for query")
	@click.option("-r", "--region", default="wt-wt", help="wt-wt, us-en, ru-ru, etc. -region https://duckduckgo.com/params")
	@click.option("-s", "--safesearch", default="moderate", type=click.Choice(["on", "moderate", "off"]))
	@click.option("-t", "--timelimit", default=None, type=click.Choice(["Day", "Week", "Month", "Year"]))
	@click.option("-size", "--size", default=None, type=click.Choice(["Small", "Medium", "Large", "Wallpaper"]))
	@click.option(
	"-c",
	"--color",
	default=None,
	type=click.Choice(
	[
	"color",
	"Monochrome",
	"Red",
	"Orange",
	"Yellow",
	"Green",
	"Blue",
	"Purple",
	"Pink",
	"Brown",
	"Black",
	"Gray",
	"Teal",
	"White",
	]
	),
	)
	@click.option(
	"-type", "--type_image", default=None, type=click.Choice(["photo", "clipart", "gif", "transparent", "line"])
	)
	@click.option("-l", "--layout", default=None, type=click.Choice(["Square", "Tall", "Wide"]))
	@click.option(
	"-lic",
	"--license_image",
	default=None,
	type=click.Choice(["any", "Public", "Share", "Modify", "ModifyCommercially"]),
	)
	@click.option("-m", "--max_results", default=90, help="maximum number of results, default=90")
	@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)")
	@click.option("-d", "--download", is_flag=True, default=False, help="download and save images to 'keywords' folder")
	@click.option("-th", "--threads", default=10, help="download threads, default=10")
	@click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150")
	def images(
	keywords,
	region,
	safesearch,
	timelimit,
	size,
	color,
	type_image,
	layout,
	license_image,
	download,
	threads,
	max_results,
	output,
	proxy,
	):
	"""CLI function to perform a images search using DuckDuckGo API."""
	data = WEBS(proxies=proxy).images(
	keywords=keywords,
	region=region,
	safesearch=safesearch,
	timelimit=timelimit,
	size=size,
	color=color,
	type_image=type_image,
	layout=layout,
	license_image=license_image,
	max_results=max_results,
	)
	keywords = _sanitize_keywords(keywords)
	filename = f"images_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}"
	if output == "print" and not download:
	_print_data(data)
	elif output == "csv":
	_save_csv(f"{filename}.csv", data)
	elif output == "json":
	_save_json(f"{filename}.json", data)
	if download:
	_download_results(keywords, data, images=True, proxy=proxy, threads=threads)


	@cli.command()
	@click.option("-k", "--keywords", required=True, help="keywords for query")
	@click.option("-r", "--region", default="wt-wt", help="wt-wt, us-en, ru-ru, etc. -region https://duckduckgo.com/params")
	@click.option("-s", "--safesearch", default="moderate", type=click.Choice(["on", "moderate", "off"]))
	@click.option("-t", "--timelimit", default=None, type=click.Choice(["d", "w", "m"]), help="day, week, month")
	@click.option("-res", "--resolution", default=None, type=click.Choice(["high", "standart"]))
	@click.option("-d", "--duration", default=None, type=click.Choice(["short", "medium", "long"]))
	@click.option("-lic", "--license_videos", default=None, type=click.Choice(["creativeCommon", "youtube"]))
	@click.option("-m", "--max_results", default=50, help="maximum number of results, default=50")
	@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)")
	@click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150")
	def videos(keywords, region, safesearch, timelimit, resolution, duration, license_videos, max_results, output, proxy):
	"""CLI function to perform a videos search using DuckDuckGo API."""
	data = WEBS(proxies=proxy).videos(
	keywords=keywords,
	region=region,
	safesearch=safesearch,
	timelimit=timelimit,
	resolution=resolution,
	duration=duration,
	license_videos=license_videos,
	max_results=max_results,
	)
	filename = f"videos_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}"
	if output == "print":
	_print_data(data)
	elif output == "csv":
	_save_csv(f"{filename}.csv", data)
	elif output == "json":
	_save_json(f"{filename}.json", data)


	@cli.command()
	@click.option("-k", "--keywords", required=True, help="keywords for query")
	@click.option("-r", "--region", default="wt-wt", help="wt-wt, us-en, ru-ru, etc. -region https://duckduckgo.com/params")
	@click.option("-s", "--safesearch", default="moderate", type=click.Choice(["on", "moderate", "off"]))
	@click.option("-t", "--timelimit", default=None, type=click.Choice(["d", "w", "m", "y"]), help="day, week, month, year")
	@click.option("-m", "--max_results", default=25, help="maximum number of results, default=25")
	@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)")
	@click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150")
	def news(keywords, region, safesearch, timelimit, max_results, output, proxy):
	"""CLI function to perform a news search using DuckDuckGo API."""
	data = WEBS(proxies=proxy).news(
	keywords=keywords, region=region, safesearch=safesearch, timelimit=timelimit, max_results=max_results
	)
	filename = f"news_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}"
	if output == "print":
	_print_data(data)
	elif output == "csv":
	_save_csv(f"{filename}.csv", data)
	elif output == "json":
	_save_json(f"{filename}.json", data)


	@cli.command()
	@click.option("-k", "--keywords", required=True, help="keywords for query")
	@click.option("-p", "--place", default=None, help="simplified search - if set, the other parameters are not used")
	@click.option("-s", "--street", default=None, help="house number/street")
	@click.option("-c", "--city", default=None, help="city of search")
	@click.option("-county", "--county", default=None, help="county of search")
	@click.option("-state", "--state", default=None, help="state of search")
	@click.option("-country", "--country", default=None, help="country of search")
	@click.option("-post", "--postalcode", default=None, help="postalcode of search")
	@click.option("-lat", "--latitude", default=None, help="""if lat and long are set, the other params are not used""")
	@click.option("-lon", "--longitude", default=None, help="""if lat and long are set, the other params are not used""")
	@click.option("-r", "--radius", default=0, help="expand the search square by the distance in kilometers")
	@click.option("-m", "--max_results", default=50, help="number of results, default=50")
	@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)")
	@click.option("-proxy", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150")
	def maps(
	keywords,
	place,
	street,
	city,
	county,
	state,
	country,
	postalcode,
	latitude,
	longitude,
	radius,
	max_results,
	output,
	proxy,
	):
	"""CLI function to perform a maps search using DuckDuckGo API."""
	data = WEBS(proxies=proxy).maps(
	keywords=keywords,
	place=place,
	street=street,
	city=city,
	county=county,
	state=state,
	country=country,
	postalcode=postalcode,
	latitude=latitude,
	longitude=longitude,
	radius=radius,
	max_results=max_results,
	)
	filename = f"maps_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}"
	if output == "print":
	_print_data(data)
	elif output == "csv":
	_save_csv(f"{filename}.csv", data)
	elif output == "json":
	_save_json(f"{filename}.json", data)


	@cli.command()
	@click.option("-k", "--keywords", required=True, help="text for translation")
	@click.option("-f", "--from_", help="What language to translate from (defaults automatically)")
	@click.option("-t", "--to", default="en", help="de, ru, fr, etc. What language to translate, defaults='en'")
	@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)")
	@click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150")
	def translate(keywords, from_, to, output, proxy):
	"""CLI function to perform translate using DuckDuckGo API."""
	data = WEBS(proxies=proxy).translate(keywords=keywords, from_=from_, to=to)
	filename = f"translate_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}"
	if output == "print":
	_print_data(data)
	elif output == "csv":
	_save_csv(f"{filename}.csv", data)
	elif output == "json":
	_save_json(f"{filename}.json", data)


	@cli.command()
	@click.option("-k", "--keywords", required=True, help="keywords for query")
	@click.option("-r", "--region", default="wt-wt", help="wt-wt, us-en, ru-ru, etc. -region https://duckduckgo.com/params")
	@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)")
	@click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150")
	def suggestions(keywords, region, output, proxy):
	"""CLI function to perform a suggestions search using DuckDuckGo API."""
	data = WEBS(proxies=proxy).suggestions(keywords=keywords, region=region)
	filename = f"suggestions_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}"
	if output == "print":
	_print_data(data)
	elif output == "csv":
	_save_csv(f"{filename}.csv", data)
	elif output == "json":
	_save_json(f"{filename}.json", data)


	if __name__ == "__main__":
	cli(prog_name="WEBS")