Laurine Sottani
commited on
Commit
·
0e831b4
1
Parent(s):
a171675
ui changes
Browse files- webscraping_cleaning.py +7 -8
webscraping_cleaning.py
CHANGED
|
@@ -87,7 +87,7 @@ def process(url: str, out_name: str) -> str:
|
|
| 87 |
return out_path
|
| 88 |
|
| 89 |
with gr.Blocks(title="Web → Markdown") as demo:
|
| 90 |
-
gr.Markdown("# 🌐
|
| 91 |
with gr.Row():
|
| 92 |
with gr.Column():
|
| 93 |
url_in = gr.Textbox(label="URL à scraper")
|
|
@@ -100,13 +100,12 @@ with gr.Blocks(title="Web → Markdown") as demo:
|
|
| 100 |
gr.Markdown(
|
| 101 |
"""
|
| 102 |
---
|
| 103 |
-
|
| 104 |
-
-
|
| 105 |
-
-
|
| 106 |
-
-
|
| 107 |
-
-
|
| 108 |
-
|
| 109 |
-
Le web, débarrassé du superflu — il ne reste que l’essentiel. 🌱
|
| 110 |
"""
|
| 111 |
)
|
| 112 |
|
|
|
|
| 87 |
return out_path
|
| 88 |
|
| 89 |
with gr.Blocks(title="Web → Markdown") as demo:
|
| 90 |
+
gr.Markdown("# 🌐 Web Scraping — du web vers Markdown")
|
| 91 |
with gr.Row():
|
| 92 |
with gr.Column():
|
| 93 |
url_in = gr.Textbox(label="URL à scraper")
|
|
|
|
| 100 |
gr.Markdown(
|
| 101 |
"""
|
| 102 |
---
|
| 103 |
+
** Opérations effectuées :**
|
| 104 |
+
- Extraction ciblée du contenu utile (titre, texte, articles, etc.)
|
| 105 |
+
- Suppression automatique des menus, pubs et scripts indésirables
|
| 106 |
+
- Mise en forme fluide en Markdown lisible et propre
|
| 107 |
+
- Téléchargement immédiat du résultat
|
| 108 |
+
|
|
|
|
| 109 |
"""
|
| 110 |
)
|
| 111 |
|