Spaces:

mgokg
/

google_search

Running

App Files Files Community

google_search / app.py

mgokg

Update app.py

fea63a0 verified 2 days ago

raw

history blame

6.17 kB

	import gradio as gr
	import requests
	from bs4 import BeautifulSoup
	from gradio_client import Client
	from urllib.parse import urljoin
	import pandas as pd
	from io import StringIO
	import json
	import groq
	import os

	google_api_key = os.getenv('google_search')
	API_URL = "https://blavken-flowiseblav.hf.space/api/v1/prediction/fbc118dc-ec00-4b59-acff-600648958be3"
	api_key = os.getenv('groq')
	client = groq.Client(api_key=api_key)

	custom_css = """
	#md {
	height: 200px;
	font-size: 30px;
	background: #121212;
	padding: 20px;
	color: white;
	border: 1 px solid white;
	font-size:10px;
	}
	"""

	def perplexica_search(payloads):
	client = Client("mgokg/PerplexicaApi")
	result = client.predict(
	prompt=f"{payloads}",
	optimization_mode="balanced",
	api_name="/question"
	)
	return result

	def query(payload):
	response = requests.post(API_URL, json=payload)
	return response.json()

	def google_search(payloads):
	output = query({
	"question": f"{payloads}",
	})
	#search_query = f"{payloads} antworte kurz und knapp. antworte auf deutsch. du findest die antwort hier:\n {output}"
	texte=""
	for o in output:
	texte +=o
	return output

	scheme = """
	{"name":"","email":"","website":""}
	"""

	def llama(messages):
	client = Client("mgokg/selenium-screenshot-gradio")
	result = client.predict(
	message=f"{messages}",
	api_name="/predict"
	)
	return result

	client = Client("AiActivity/AI-Assistant")
	result = client.predict(
	message={"text":f"instruction: return a valid json object only, no comments or explanaition, fill in the missing information. use this json scheme.\n {scheme}\n leave blank if information is not verfügbar. here is the information for the values:\n{message}","files":[]},
	api_name="/chat"
	)
	print(result)

	def llm(message):
	message = f'return a json object with the keys: name,email,phone,website \n the values can be found here, leave blank if value is not available:\n {message} \n return a json object only. no text, no explanaition'
	try:
	completion = client.chat.completions.create(
	model="llama3-70b-8192",
	messages=[
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": f"{message}"}
	],
	)
	return completion.choices[0].message.content
	except Exception as e:
	return f"Error in response generation: {str(e)}"

	def qwen(jsondata):
	client = Client("Qwen/Qwen2.5-72B-Instruct")
	result = client.predict(
	query= f'return a json object with the keys: name,email,phone,website for each verein \n the values can be found here, leave blank if value is not available:\n {jsondata} \n return a json object only. no text, no explanaition',
	history=[],
	system="You are Qwen, created by Alibaba Cloud. You are a helpful assistant.",
	api_name="/model_chat"
	)
	return result

	def list_of_clubs(ort):
	base_url = "https://vereine-in-deutschland.net"
	all_links_text = []
	initial_url = f"{base_url}/vereine/Bayern/{ort}"

	try:
	response = requests.get(initial_url)
	response.raise_for_status()
	soup = BeautifulSoup(response.content, 'html.parser')

	# Determine the last page
	link_element = soup.select_one('li.page-item:nth-child(8) > a:nth-child(1)')
	last_page = 10
	if link_element and 'href' in link_element.attrs:
	href = link_element['href']
	last_page = int(href.split('/')[-1])

	# Loop through all pages and collect links
	for page_number in range(1, last_page + 1):
	page_url = f"{base_url}/vereine/Bayern/{ort}/p/{page_number}"
	response = requests.get(page_url)
	response.raise_for_status()
	soup = BeautifulSoup(response.content, 'html.parser')
	target_div = soup.select_one('div.row-cols-1:nth-child(4)')

	if target_div:
	texts = [a.text for a in target_div.find_all('a', href=True)]
	all_links_text.extend(texts)
	else:
	print(f"Target div not found on page {page_number}")

	except Exception as e:
	return str(e), []

	all_links_text = all_links_text[0::2]
	return all_links_text

	def process_ort(ort):
	links_text = list_of_clubs(ort)
	#return links_text
	vereine = []

	for verein in links_text:
	prompt=f"{verein}",
	result = llama(prompt)
	vereine.append(result)
	print(result)
	#data = json.loads(vereine)
	#df = pd.DataFrame(vereine)
	return vereine

	for verein in links_text:
	client = Client("mgokg/gemini-2.0-flash-exp")
	result = client.predict(
	prompt=f"impressum {verein}",
	api_name="/perform_search"
	)
	#json_object = llm(result)
	"""
	headers = {
	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
	}
	url = f"https://www.google.com/search?q=impressum {verein}"
	response = requests.get(url, headers=headers)
	soup = BeautifulSoup(response.content, 'html.parser')
	impressum_div = soup.find('body')
	contact_detailes = impressum_div.text
	json_object = llm(contact_detailes)
	"""
	vereine.append(result)
	#dicts = [json.loads(item) for item in vereine]
	#df = pd.DataFrame(dicts)
	#return df
	return vereine

	# Create the Gradio interface
	with gr.Blocks(css=custom_css) as demo:
	with gr.Row():
	#details_output = gr.DataFrame(label="Ausgabe", elem_id="md")
	details_output = gr.Textbox(label="Ausgabe")
	with gr.Row():
	ort_input = gr.Textbox(label="Ort eingeben", placeholder="ask anything...")
	with gr.Row():
	button = gr.Button("Senden")

	# Connect the button to the function
	button.click(fn=process_ort, inputs=ort_input, outputs=details_output)

	# Launch the Gradio application
	demo.launch()