Spaces:
Build error
Build error
| import gradio as gr | |
| import requests | |
| from bs4 import BeautifulSoup | |
| from gradio_client import Client | |
| from urllib.parse import urljoin | |
| #import pandas as pd | |
| #from io import StringIO | |
| import json | |
| #import groq | |
| import os | |
| def list_of_clubs(ort): | |
| base_url = "https://vereine-in-deutschland.net" | |
| all_links_text = [] | |
| initial_url = f"{base_url}/vereine/Bayern/{ort}" | |
| try: | |
| response = requests.get(initial_url) | |
| response.raise_for_status() | |
| soup = BeautifulSoup(response.content, 'html.parser') | |
| # Determine the last page | |
| link_element = soup.select_one('li.page-item:nth-child(8) > a:nth-child(1)') | |
| last_page = 10 | |
| if link_element and 'href' in link_element.attrs: | |
| href = link_element['href'] | |
| last_page = int(href.split('/')[-1]) | |
| # Loop through all pages and collect links | |
| for page_number in range(1, last_page + 1): | |
| page_url = f"{base_url}/vereine/Bayern/{ort}/p/{page_number}" | |
| response = requests.get(page_url) | |
| response.raise_for_status() | |
| soup = BeautifulSoup(response.content, 'html.parser') | |
| target_div = soup.select_one('div.row-cols-1:nth-child(4)') | |
| if target_div: | |
| texts = [a.text for a in target_div.find_all('a', href=True)] | |
| all_links_text.extend(texts) | |
| else: | |
| print(f"Target div not found on page {page_number}") | |
| except Exception as e: | |
| return str(e), [] | |
| all_links_text = all_links_text[0::2] | |
| return all_links_text | |
| def process_ort(ort): | |
| links_text = list_of_clubs(ort) | |
| return links_text | |
| # Create the Gradio interface | |
| iface = gr.Interface( | |
| fn=process_ort, | |
| inputs=gr.Textbox(lines=1, placeholder="Ort eingeben..."), | |
| outputs=gr.Textbox(), | |
| title="vereine", | |
| description="VereineFinder" | |
| ) | |
| # Launch the Gradio app | |
| iface.launch() |