Spaces:

User111-ops
/

search-app

Runtime error

App Files Files Community

User111-ops commited on Jun 20

Commit

44809e2

verified ·

1 Parent(s): e51aca9

Upload 3 files

Browse files

Files changed (3) hide show

README.md +10 -6
app.py +91 -0
requirements.txt +4 -0

README.md CHANGED Viewed

@@ -1,12 +1,16 @@
 ---
-title: Search App
-emoji: 🔥
-colorFrom: green
-colorTo: purple
 sdk: gradio
-sdk_version: 5.34.2
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Haystack Search App
+emoji: 🔍
+colorFrom: gray
+colorTo: blue
 sdk: gradio
+sdk_version: "3.50.2"
 app_file: app.py
 pinned: false
 ---
+# 🔍 Haystack Search App
+Cette application vous permet de rechercher intelligemment des passages pertinents à partir de fichiers PDF, DOCX et TXT.
+Déposez vos fichiers, entrez un mot-clé ou une question, et l’IA (Haystack + RoBERTa) retrouve les extraits utiles.

app.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import os
+import shutil
+import tempfile
+import uuid
+import gradio as gr
+from haystack.document_stores import InMemoryDocumentStore
+from haystack.nodes import FARMReader, PreProcessor, TextConverter, PDFToTextConverter, DocxToTextConverter
+from haystack.nodes import BM25Retriever
+from haystack.pipelines import ExtractiveQAPipeline
+UPLOAD_DIR = os.path.join(tempfile.gettempdir(), "hf_uploads")
+os.makedirs(UPLOAD_DIR, exist_ok=True)
+document_store = InMemoryDocumentStore()
+retriever = BM25Retriever(document_store=document_store)
+reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=False)
+pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever)
+preprocessor = PreProcessor(
+    clean_empty_lines=True,
+    clean_whitespace=True,
+    clean_header_footer=True,
+    split_by="word",
+    split_length=200,
+    split_respect_sentence_boundary=True,
+)
+converters = {
+    ".pdf": PDFToTextConverter(remove_numeric_tables=True),
+    ".txt": TextConverter(),
+    ".docx": DocxToTextConverter(),
+}
+def clear_files():
+    shutil.rmtree(UPLOAD_DIR)
+    os.makedirs(UPLOAD_DIR)
+    return "📁 Tous les fichiers ont été supprimés."
+def add_files(files):
+    for file in files:
+        ext = os.path.splitext(file.name)[-1].lower()
+        if ext in converters:
+            file_path = os.path.join(UPLOAD_DIR, file.name)
+            with open(file_path, "wb") as f:
+                f.write(file.read())
+    return f"{len(files)} fichier(s) ajouté(s)."
+def search_keyword(query):
+    if not query:
+        return "⚠️ Entrez un mot-clé."
+    docs = []
+    for root, _, files in os.walk(UPLOAD_DIR):
+        for file in files:
+            path = os.path.join(root, file)
+            ext = os.path.splitext(file)[-1].lower()
+            converter = converters.get(ext)
+            if converter:
+                doc = converter.convert(file_path=path, meta={"name": file, "path": path})
+                docs.extend(preprocessor.process([doc]))
+    document_store.delete_documents()
+    document_store.write_documents(docs)
+    prediction = pipeline.run(query=query, params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}})
+    answers = prediction["answers"]
+    results = []
+    for ans in answers:
+        results.append(f"**Extrait :** {ans.context.strip()}\n\n**Fichier :** {ans.meta.get('name')}\n**Chemin :** {ans.meta.get('path')}\n\n---")
+    return "\n".join(results) if results else "Aucun passage trouvé."
+with gr.Blocks() as demo:
+    gr.Markdown("""# 🔍 Recherche intelligente dans vos documents (.pdf, .txt, .docx)""")
+    file_input = gr.File(file_types=[".pdf", ".txt", ".docx"], file_count="multiple", label="Ajoutez vos fichiers ici")
+    upload_btn = gr.Button("📁 Ajouter les fichiers")
+    clear_btn = gr.Button("🗑️ Vider les fichiers")
+    query = gr.Textbox(label="Mot-clé ou question", placeholder="Tapez un mot-clé ici...")
+    search_btn = gr.Button("🔎 Rechercher")
+    output = gr.Markdown()
+    upload_btn.click(fn=add_files, inputs=file_input, outputs=output)
+    clear_btn.click(fn=clear_files, outputs=output)
+    search_btn.click(fn=search_keyword, inputs=query, outputs=output)
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+farm-haystack==1.18.0
+pandas<2.0.0
+pydantic==1.10.13
+gradio==3.50.2