Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -64,9 +64,6 @@ def parse_links_and_content(ort):
|
|
64 |
if target_div:
|
65 |
links = [urljoin(base_url, a['href']) for a in target_div.find_all('a', href=True)]
|
66 |
texts = [a.text for a in target_div.find_all('a', href=True)]
|
67 |
-
texts=texts.replace("| Amtsgericht: Schweinfurt", "")
|
68 |
-
texts=texts.replace("Adresse folgt", "")
|
69 |
-
texts=texts.replace("Adresse", "Adresse:")
|
70 |
all_links.extend(links)
|
71 |
all_links_text.extend(texts)
|
72 |
else:
|
@@ -101,9 +98,12 @@ def scrape_links(links):
|
|
101 |
|
102 |
# Extract the name of the Verein from the URL
|
103 |
vereinsname = parts[-1] if parts[-1] else parts[-2] # Fallback to the second-to-last part if the last part is empty
|
104 |
-
|
|
|
|
|
|
|
105 |
if target_nav:
|
106 |
-
details.append(f"Verein: {vereinsname} {
|
107 |
else:
|
108 |
details.append(f"Verein: {vereinsname} - No contact information found")
|
109 |
except Exception as e:
|
|
|
64 |
if target_div:
|
65 |
links = [urljoin(base_url, a['href']) for a in target_div.find_all('a', href=True)]
|
66 |
texts = [a.text for a in target_div.find_all('a', href=True)]
|
|
|
|
|
|
|
67 |
all_links.extend(links)
|
68 |
all_links_text.extend(texts)
|
69 |
else:
|
|
|
98 |
|
99 |
# Extract the name of the Verein from the URL
|
100 |
vereinsname = parts[-1] if parts[-1] else parts[-2] # Fallback to the second-to-last part if the last part is empty
|
101 |
+
texte = target_nav.text.strip()
|
102 |
+
texte=texte.replace("| Amtsgericht: Schweinfurt", "")
|
103 |
+
texte=texte.replace("Adresse folgt", "")
|
104 |
+
texte=texte.replace("Adresse", "Adresse:")
|
105 |
if target_nav:
|
106 |
+
details.append(f"Verein: {vereinsname} {texte}")
|
107 |
else:
|
108 |
details.append(f"Verein: {vereinsname} - No contact information found")
|
109 |
except Exception as e:
|