Laurine Sottani
commited on
Commit
·
0570e44
1
Parent(s):
6997817
fix requirements and update
Browse files- requirements.txt +1 -1
- webscraping_cleaning.py +13 -1
requirements.txt
CHANGED
|
@@ -1,2 +1,2 @@
|
|
| 1 |
-
|
| 2 |
readability
|
|
|
|
| 1 |
+
markdownify
|
| 2 |
readability
|
webscraping_cleaning.py
CHANGED
|
@@ -87,7 +87,7 @@ def process(url: str, out_name: str) -> str:
|
|
| 87 |
return out_path
|
| 88 |
|
| 89 |
with gr.Blocks(title="Web → Markdown") as demo:
|
| 90 |
-
gr.Markdown("# 🌐
|
| 91 |
with gr.Row():
|
| 92 |
with gr.Column():
|
| 93 |
url_in = gr.Textbox(label="URL à scraper")
|
|
@@ -97,5 +97,17 @@ with gr.Blocks(title="Web → Markdown") as demo:
|
|
| 97 |
file_out = gr.File(label="Fichier Markdown")
|
| 98 |
btn.click(fn=process, inputs=[url_in, out_name], outputs=file_out)
|
| 99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
if __name__ == "__main__":
|
| 101 |
demo.launch(share=True)
|
|
|
|
| 87 |
return out_path
|
| 88 |
|
| 89 |
with gr.Blocks(title="Web → Markdown") as demo:
|
| 90 |
+
gr.Markdown("# 🌐 🌐 WebDetox — le soin purifiant du web vers Markdown")
|
| 91 |
with gr.Row():
|
| 92 |
with gr.Column():
|
| 93 |
url_in = gr.Textbox(label="URL à scraper")
|
|
|
|
| 97 |
file_out = gr.File(label="Fichier Markdown")
|
| 98 |
btn.click(fn=process, inputs=[url_in, out_name], outputs=file_out)
|
| 99 |
|
| 100 |
+
gr.Markdown(
|
| 101 |
+
"""
|
| 102 |
+
---
|
| 103 |
+
**💆♂️ Cure WebDetox :**
|
| 104 |
+
- 🌐 Extraction ciblée du contenu utile (titre, texte, articles, etc.)
|
| 105 |
+
- 🧽 Suppression automatique des menus, pubs et scripts indésirables
|
| 106 |
+
- ✨ Mise en forme fluide en Markdown lisible et propre
|
| 107 |
+
- 💾 Téléchargement immédiat du résultat
|
| 108 |
+
Le web, débarrassé du superflu — il ne reste que l’essentiel. 🌱
|
| 109 |
+
"""
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
if __name__ == "__main__":
|
| 113 |
demo.launch(share=True)
|