Spaces:

gauthamnairy
/

PageIndexAPI

Running

App Files Files Community

gauthamnairy commited on Feb 8

Commit

385769a

verified ·

1 Parent(s): b163dc2

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -67

app.py CHANGED Viewed

@@ -1,67 +1,76 @@
-import gradio as gr
-import os
-from pageindex.core.tree_index import TreeIndex
-from llm_config import get_llm_client, get_model_name
-# Initialize clients (checking for environment variables)
-# We do this inside the function or globally, but for robustness inside function is safer if env vars change (less likely in HF Spaces)
-# However, initializing once is better for connection pooling if applicable. Let's do it inside for now to handle errors gracefully.
-def process_docling_and_chat(markdown_text, user_query):
-    if not markdown_text:
-        return "Please provide document markdown text."
-    if not user_query:
-        return "Please provide a query."
-    try:
-        # 1. Build the PageIndex Tree locally in the Space
-        tree = TreeIndex()
-        tree.build_from_markdown(markdown_text)
-        # 2. Initialize the Navigator (The "Brain")
-        # Try Nvidia first, then Mistral
-        try:
-            client = get_llm_client(provider="nvidia")
-            model = get_model_name(provider="nvidia")
-            # Test connection simply or just proceed
-        except Exception as e:
-            print(f"Nvidia client failed: {e}. Falling back to Mistral.")
-            client = get_llm_client(provider="mistral")
-            model = get_model_name(provider="mistral")
-        # 3. Perform Reasoning Search
-        # This uses the internal logic of the repo to navigate the tree
-        context = tree.reasoning_search(query=user_query, llm_client=client)
-        # 4. Final Answer Extraction
-        # Using the same client for consistency
-        response = client.chat.completions.create(
-            model=model,
-            messages=[
-                {"role": "system", "content": "You are a helpful assistant. Use the provided context to answer the user's query."},
-                {"role": "user", "content": f"Context:\n{context}\n\nQuery: {user_query}"}
-            ]
-        )
-        return response.choices[0].message.content
-    except Exception as e:
-        return f"An error occurred: {str(e)}"
-# Gradio UI setup
-with gr.Blocks(title="Petromind AI - PageIndex RAG") as demo:
-    gr.Markdown("# Oil & Gas Report - PageIndex RAG")
-    gr.Markdown("Upload document content (markdown format) and ask questions to extract specific information using PageIndex reasoning.")
-    with gr.Row():
-        with gr.Column(scale=1):
-            input_md = gr.Textbox(label="Paste Docling Markdown Here", lines=15, placeholder="# Document Title\n\n## Section 1\nContent...")
-        with gr.Column(scale=1):
-            query = gr.Textbox(label="What do you want to extract?", placeholder="e.g., What is the casing size?")
-            btn = gr.Button("Analyze", variant="primary")
-            output = gr.Textbox(label="Result", lines=10, interactive=False)
-    btn.click(fn=process_docling_and_chat, inputs=[input_md, query], outputs=output)
-if __name__ == "__main__":
-    # Enable queue for concurrency
-    demo.queue().launch(server_name="0.0.0.0", server_port=7860)

+import gradio as gr
+import os
+from pageindex.core.tree_index import TreeIndex
+from llm_config import get_llm_client, get_model_name
+# Initialize clients (checking for environment variables)
+# We do this inside the function or globally, but for robustness inside function is safer if env vars change (less likely in HF Spaces)
+# However, initializing once is better for connection pooling if applicable. Let's do it inside for now to handle errors gracefully.
+# Security: Check for APP_TOKEN env var. If not set, default to open access or a warning.
+# User provided specific token to use.
+REQUIRED_TOKEN = os.getenv("APP_TOKEN", "849ejdkf2Audjo2Jf3jdoirfjh")
+def process_docling_and_chat(markdown_text, user_query, token):
+    if token != REQUIRED_TOKEN:
+        return "Error: Invalid Authentication Token."
+    if not markdown_text:
+        return "Please provide document markdown text."
+    if not user_query:
+        return "Please provide a query."
+    try:
+        # 1. Build the PageIndex Tree locally in the Space
+        tree = TreeIndex()
+        tree.build_from_markdown(markdown_text)
+        # 2. Initialize the Navigator (The "Brain")
+        # Try Nvidia first, then Mistral
+        try:
+            client = get_llm_client(provider="nvidia")
+            model = get_model_name(provider="nvidia")
+            # Test connection simply or just proceed
+        except Exception as e:
+            print(f"Nvidia client failed: {e}. Falling back to Mistral.")
+            client = get_llm_client(provider="mistral")
+            model = get_model_name(provider="mistral")
+        # 3. Perform Reasoning Search
+        # This uses the internal logic of the repo to navigate the tree
+        context = tree.reasoning_search(query=user_query, llm_client=client)
+        # 4. Final Answer Extraction
+        # Using the same client for consistency
+        response = client.chat.completions.create(
+            model=model,
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant. Use the provided context to answer the user's query."},
+                {"role": "user", "content": f"Context:\n{context}\n\nQuery: {user_query}"}
+            ]
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        return f"An error occurred: {str(e)}"
+# Gradio UI setup
+with gr.Blocks(title="Petromind AI - PageIndex RAG") as demo:
+    gr.Markdown("# Oil & Gas Report - PageIndex RAG")
+    gr.Markdown("Upload document content (markdown format) and ask questions to extract specific information using PageIndex reasoning.")
+    with gr.Row():
+        with gr.Column(scale=1):
+            input_md = gr.Textbox(label="Paste Docling Markdown Here", lines=15, placeholder="# Document Title\n\n## Section 1\nContent...")
+        with gr.Column(scale=1):
+            query = gr.Textbox(label="What do you want to extract?", placeholder="e.g., What is the casing size?")
+            token_input = gr.Textbox(label="API Token", placeholder="Enter access token", type="password")
+            btn = gr.Button("Analyze", variant="primary")
+            output = gr.Textbox(label="Result", lines=10, interactive=False)
+    btn.click(fn=process_docling_and_chat, inputs=[input_md, query, token_input], outputs=output)
+if __name__ == "__main__":
+    # Enable queue for concurrency
+    demo.queue().launch(server_name="0.0.0.0", server_port=7860)