|
|
import gradio as gr |
|
|
import requests |
|
|
import urllib.parse |
|
|
import re |
|
|
import xmltodict |
|
|
from collections import Counter |
|
|
import unicodedata |
|
|
|
|
|
def normalize_keyword(keyword): |
|
|
if isinstance(keyword, str): |
|
|
return ''.join(c for c in unicodedata.normalize('NFD', keyword.lower()) if unicodedata.category(c) != 'Mn') |
|
|
return keyword |
|
|
|
|
|
def fetch_google_suggestions(query, lang_code="es", client="firefox"): |
|
|
encoded_query = urllib.parse.quote(query) |
|
|
url = f"http://suggestqueries.google.com/complete/search?client={client}&hl={lang_code}&gl={lang_code}&q={encoded_query}" |
|
|
response = requests.get(url) |
|
|
if response.status_code == 200: |
|
|
try: |
|
|
return [normalize_keyword(suggestion) for suggestion in response.json()[1] if isinstance(suggestion, str)] |
|
|
except ValueError: |
|
|
print("Error decodificando JSON de Google") |
|
|
return [] |
|
|
else: |
|
|
return [] |
|
|
|
|
|
|
|
|
def fetch_brave_suggestions(query, lang_code="es"): |
|
|
encoded_query = urllib.parse.quote(query) |
|
|
url = f"https://search.brave.com/api/suggest?q={encoded_query}" |
|
|
response = requests.get(url) |
|
|
if response.status_code == 200: |
|
|
try: |
|
|
data = response.json() |
|
|
if len(data) > 1 and isinstance(data[1], list): |
|
|
|
|
|
return [normalize_keyword(item) for item in data[1] for _ in range(int(0.35 * 5))] |
|
|
else: |
|
|
print("No se encontraron sugerencias en el formato esperado (Brave).") |
|
|
return [] |
|
|
except ValueError: |
|
|
print("Error decodificando JSON de Brave") |
|
|
return [] |
|
|
else: |
|
|
return [] |
|
|
|
|
|
|
|
|
def fetch_qwant_suggestions(query, lang_code="es"): |
|
|
encoded_query = urllib.parse.quote(query) |
|
|
url = f"https://api.qwant.com/api/suggest/?client=opensearch&q={encoded_query}" |
|
|
response = requests.get(url) |
|
|
if response.status_code == 200: |
|
|
try: |
|
|
data = response.json() |
|
|
if len(data) > 1 and isinstance(data[1], list): |
|
|
return [normalize_keyword(item) for item in data[1]] |
|
|
else: |
|
|
print("No se encontraron sugerencias en el formato esperado (Qwant).") |
|
|
return [] |
|
|
except ValueError: |
|
|
print("Error decodificando JSON de Qwant") |
|
|
return [] |
|
|
else: |
|
|
return [] |
|
|
|
|
|
|
|
|
def fetch_duckduckgo_suggestions(query, lang_code="es"): |
|
|
encoded_query = urllib.parse.quote(query) |
|
|
url = f"https://duckduckgo.com/ac/?q={encoded_query}&kl={lang_code}" |
|
|
response = requests.get(url) |
|
|
if response.status_code == 200: |
|
|
try: |
|
|
data = response.json() |
|
|
return [normalize_keyword(item['phrase']) for item in data] |
|
|
except ValueError: |
|
|
print("Error decodificando JSON de DuckDuckGo") |
|
|
return [] |
|
|
else: |
|
|
return [] |
|
|
|
|
|
|
|
|
def fetch_youtube_suggestions(query, lang_code="es"): |
|
|
encoded_query = urllib.parse.quote(query) |
|
|
url = f"http://suggestqueries.google.com/complete/search?client=youtube&hl={lang_code}&q={encoded_query}" |
|
|
response = requests.get(url) |
|
|
if response.status_code == 200: |
|
|
try: |
|
|
match = re.search(r'window\.google\.ac\.h\(\["[^"]*",\[(.*?)\],', response.text) |
|
|
if match: |
|
|
suggestions_data = match.group(1) |
|
|
return [normalize_keyword(suggestion) for suggestion in re.findall(r'\["([^"]+)"', suggestions_data)] |
|
|
else: |
|
|
print("No se encontraron sugerencias en el formato esperado (YouTube antiguo).") |
|
|
return [] |
|
|
except Exception as e: |
|
|
print(f"Error procesando la respuesta de YouTube (antiguo): {e}") |
|
|
return [] |
|
|
else: |
|
|
return [] |
|
|
|
|
|
|
|
|
def fetch_youtube_suggestions_new(query, lang_code="es"): |
|
|
encoded_query = urllib.parse.quote(query) |
|
|
url = f"https://suggestqueries-clients6.youtube.com/complete/search?client=youtube&ds=yt&q={encoded_query}&hl={lang_code}" |
|
|
response = requests.get(url) |
|
|
if response.status_code == 200: |
|
|
try: |
|
|
match = re.search(r'window\.google\.ac\.h\(\["[^"]*",\[(.*?)\],', response.text) |
|
|
if match: |
|
|
suggestions_data = match.group(1) |
|
|
return [normalize_keyword(suggestion) for suggestion in re.findall(r'\["([^"]+)"', suggestions_data)] |
|
|
else: |
|
|
print("No se encontraron sugerencias en el formato esperado (nueva API de YouTube).") |
|
|
return [] |
|
|
except Exception as e: |
|
|
print(f"Error procesando la respuesta de la nueva API de YouTube: {e}") |
|
|
return [] |
|
|
else: |
|
|
return [] |
|
|
|
|
|
|
|
|
def fetch_bing_suggestions(query, market="en-US"): |
|
|
url = "https://api.bing.com/qsml.aspx" |
|
|
params = {"Market": market, "query": query} |
|
|
headers = {"User-agent": "Mozilla/5.0"} |
|
|
response = requests.get(url, params=params, headers=headers) |
|
|
if response.status_code == 200: |
|
|
try: |
|
|
obj = xmltodict.parse(response.content) |
|
|
suggestions = obj['SearchSuggestion']['Section']['Item'] |
|
|
if isinstance(suggestions, list): |
|
|
return [normalize_keyword(s['Text']) for s in suggestions] |
|
|
elif isinstance(suggestions, dict): |
|
|
return [normalize_keyword(suggestions['Text'])] |
|
|
except Exception as e: |
|
|
print(f"Error procesando la respuesta de Bing: {e}") |
|
|
return [] |
|
|
else: |
|
|
return [] |
|
|
|
|
|
|
|
|
def fetch_amazon_suggestions(query, market_id="ATVPDKIKX0DER", alias="aps"): |
|
|
url = "https://completion.amazon.com/api/2017/suggestions" |
|
|
params = {"mid": market_id, "alias": alias, "prefix": query} |
|
|
response = requests.get(url, params=params) |
|
|
if response.status_code == 200: |
|
|
try: |
|
|
data = response.json() |
|
|
return [normalize_keyword(item['value']) for item in data.get('suggestions', [])] |
|
|
except ValueError: |
|
|
print("Error decodificando JSON de Amazon") |
|
|
return [] |
|
|
else: |
|
|
return [] |
|
|
|
|
|
|
|
|
def expand_keyword(keyword): |
|
|
expanded_keywords = [keyword] |
|
|
for letter in 'abcdefghijklmnopqrstuvwxyz*_': |
|
|
expanded_keywords.append(keyword + " " + letter) |
|
|
expanded_keywords.append(letter + " " + keyword) |
|
|
return expanded_keywords |
|
|
|
|
|
|
|
|
def get_top_suggestions(suggestions, top_n=10): |
|
|
suggestion_counter = Counter(suggestions) |
|
|
return suggestion_counter.most_common(top_n) |
|
|
|
|
|
|
|
|
def main(keyword): |
|
|
expanded_keywords = expand_keyword(keyword) |
|
|
all_suggestions = {} |
|
|
google_suggestions_all = [] |
|
|
duckduckgo_suggestions_all = [] |
|
|
youtube_suggestions_all = [] |
|
|
bing_suggestions_all = [] |
|
|
amazon_suggestions_all = [] |
|
|
qwant_suggestions_all = [] |
|
|
brave_suggestions_all = [] |
|
|
|
|
|
|
|
|
google_clients = ["firefox", "chrome", "android", "desktop", "opera", "safari"] |
|
|
google_combined = Counter() |
|
|
for exp_keyword in expanded_keywords: |
|
|
for client in google_clients: |
|
|
google_suggestions = fetch_google_suggestions(exp_keyword, client=client) |
|
|
google_suggestions_all.extend(google_suggestions) |
|
|
google_combined.update(google_suggestions) |
|
|
for suggestion in google_suggestions: |
|
|
all_suggestions.setdefault(suggestion, {"count": 0, "sources": set()}) |
|
|
all_suggestions[suggestion]["sources"].add(f"Google ({client})") |
|
|
|
|
|
|
|
|
for suggestion, count in google_combined.items(): |
|
|
all_suggestions[suggestion]["count"] += count |
|
|
|
|
|
|
|
|
for exp_keyword in expanded_keywords: |
|
|
suggestions = fetch_duckduckgo_suggestions(exp_keyword) |
|
|
duckduckgo_suggestions_all.extend(suggestions) |
|
|
for suggestion in suggestions: |
|
|
all_suggestions.setdefault(suggestion, {"count": 0, "sources": set()}) |
|
|
all_suggestions[suggestion]["count"] += 1 |
|
|
all_suggestions[suggestion]["sources"].add('DuckDuckGo') |
|
|
|
|
|
|
|
|
for exp_keyword in expanded_keywords: |
|
|
suggestions = fetch_youtube_suggestions(exp_keyword) |
|
|
youtube_suggestions_all.extend(suggestions) |
|
|
new_suggestions = fetch_youtube_suggestions_new(exp_keyword) |
|
|
youtube_suggestions_all.extend(new_suggestions) |
|
|
for suggestion in suggestions + new_suggestions: |
|
|
all_suggestions.setdefault(suggestion, {"count": 0, "sources": set()}) |
|
|
all_suggestions[suggestion]["count"] += 1 |
|
|
all_suggestions[suggestion]["sources"].add('YouTube') |
|
|
|
|
|
|
|
|
for exp_keyword in expanded_keywords: |
|
|
suggestions = fetch_bing_suggestions(exp_keyword) |
|
|
bing_suggestions_all.extend(suggestions) |
|
|
for suggestion in suggestions: |
|
|
all_suggestions.setdefault(suggestion, {"count": 0, "sources": set()}) |
|
|
all_suggestions[suggestion]["count"] += 1 |
|
|
all_suggestions[suggestion]["sources"].add('Bing') |
|
|
|
|
|
|
|
|
for exp_keyword in expanded_keywords: |
|
|
suggestions = fetch_amazon_suggestions(exp_keyword) |
|
|
amazon_suggestions_all.extend(suggestions) |
|
|
for suggestion in suggestions: |
|
|
all_suggestions.setdefault(suggestion, {"count": 0, "sources": set()}) |
|
|
all_suggestions[suggestion]["count"] += 1 |
|
|
all_suggestions[suggestion]["sources"].add('Amazon') |
|
|
|
|
|
|
|
|
for exp_keyword in expanded_keywords: |
|
|
suggestions = fetch_qwant_suggestions(exp_keyword) |
|
|
qwant_suggestions_all.extend(suggestions) |
|
|
for suggestion in suggestions: |
|
|
all_suggestions.setdefault(suggestion, {"count": 0, "sources": set()}) |
|
|
all_suggestions[suggestion]["count"] += 1 |
|
|
all_suggestions[suggestion]["sources"].add('Qwant') |
|
|
|
|
|
|
|
|
for exp_keyword in expanded_keywords: |
|
|
suggestions = fetch_brave_suggestions(exp_keyword) |
|
|
brave_suggestions_all.extend(suggestions) |
|
|
for suggestion in suggestions: |
|
|
all_suggestions.setdefault(suggestion, {"count": 0, "sources": set()}) |
|
|
all_suggestions[suggestion]["count"] += 1 |
|
|
all_suggestions[suggestion]["sources"].add('Brave') |
|
|
|
|
|
|
|
|
combined_suggestions = sorted(all_suggestions.items(), key=lambda item: (len(item[1]["sources"]), item[1]["count"]), reverse=True) |
|
|
|
|
|
|
|
|
google_top_3 = get_top_suggestions(google_suggestions_all, top_n=3) |
|
|
duckduckgo_top_3 = get_top_suggestions(duckduckgo_suggestions_all, top_n=3) |
|
|
youtube_top_3 = get_top_suggestions(youtube_suggestions_all, top_n=3) |
|
|
bing_top_3 = get_top_suggestions(bing_suggestions_all, top_n=3) |
|
|
amazon_top_3 = get_top_suggestions(amazon_suggestions_all, top_n=3) |
|
|
qwant_top_3 = get_top_suggestions(qwant_suggestions_all, top_n=3) |
|
|
brave_top_3 = [(sug, int(freq * 0.4)) for sug, freq in get_top_suggestions(brave_suggestions_all, top_n=3)] |
|
|
|
|
|
|
|
|
all_suggestions_str = "<table><tr><th>Keyword</th><th>Buscadores</th><th>Relevancia (Total)</th></tr>" |
|
|
for suggestion, data in combined_suggestions: |
|
|
|
|
|
if 'Brave' in data['sources']: |
|
|
count = int(data['count'] * 0.4) |
|
|
else: |
|
|
count = data['count'] |
|
|
all_suggestions_str += f"<tr><td>{suggestion}</td><td>{', '.join(data['sources'])}</td><td>{count}</td></tr>" |
|
|
all_suggestions_str += "</table>" |
|
|
|
|
|
|
|
|
html_output = f""" |
|
|
<div> |
|
|
<h3>Top 10 combinadas (basado en la cantidad de buscadores y repeticiones):</h3> |
|
|
<ul> |
|
|
{''.join([f'<li>{sug} (en {len(data["sources"])} buscadores, {data["count"]} repeticiones)</li>' for sug, data in combined_suggestions[:10]])} |
|
|
</ul> |
|
|
|
|
|
<h4>Top 3 Sugerencias de Google:</h4> |
|
|
<ul> |
|
|
{''.join([f'<li>{sug} ({freq})</li>' for sug, freq in google_top_3])} |
|
|
</ul> |
|
|
|
|
|
<h4>Top 3 Sugerencias de DuckDuckGo:</h4> |
|
|
<ul> |
|
|
{''.join([f'<li>{sug} ({freq})</li>' for sug, freq in duckduckgo_top_3])} |
|
|
</ul> |
|
|
|
|
|
<h4>Top 3 Sugerencias de YouTube:</h4> |
|
|
<ul> |
|
|
{''.join([f'<li>{sug} ({freq})</li>' for sug, freq in youtube_top_3])} |
|
|
</ul> |
|
|
|
|
|
<h4>Top 3 Sugerencias de Bing:</h4> |
|
|
<ul> |
|
|
{''.join([f'<li>{sug} ({freq})</li>' for sug, freq in bing_top_3])} |
|
|
</ul> |
|
|
|
|
|
<h4>Top 3 Sugerencias de Amazon:</h4> |
|
|
<ul> |
|
|
{''.join([f'<li>{sug} ({freq})</li>' for sug, freq in amazon_top_3])} |
|
|
</ul> |
|
|
|
|
|
<h4>Top 3 Sugerencias de Qwant:</h4> |
|
|
<ul> |
|
|
{''.join([f'<li>{sug} ({freq})</li>' for sug, freq in qwant_top_3])} |
|
|
</ul> |
|
|
|
|
|
<h4>Top 3 Sugerencias de Brave:</h4> |
|
|
<ul> |
|
|
{''.join([f'<li>{sug} ({freq})</li>' for sug, freq in brave_top_3])} |
|
|
</ul> |
|
|
|
|
|
<h4>Tabla completa de palabras clave y su relevancia:</h4> |
|
|
{all_suggestions_str} |
|
|
</div> |
|
|
""" |
|
|
|
|
|
return html_output |
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=main, |
|
|
inputs="text", |
|
|
outputs=gr.HTML(), |
|
|
title="Sugerencias Combinadas de M煤ltiples Motores de B煤squeda", |
|
|
description="Ingrese una palabra clave para obtener sugerencias de b煤squeda relacionadas de Google (Firefox, Chrome, Android, Desktop, Opera, Safari), DuckDuckGo, YouTube, Bing, Amazon, Qwant y Brave.", |
|
|
) |
|
|
|
|
|
iface.launch() |
|
|
|