Spaces:
Running
Running
Commit
·
483c169
0
Parent(s):
Initial commit for LLM-Web-Scrapper project.
Browse files- .env.example +3 -0
- .gitignore +34 -0
- Dockerfile +21 -0
- LICENSE +21 -0
- README.md +10 -0
- app.py +107 -0
- config.py +32 -0
- crawl4ai_client.py +0 -0
- docker-compose.dev.yml +17 -0
- docker-compose.yml +16 -0
- firecrawl_client.py +34 -0
- llm_inference_service.py +40 -0
- requirements.txt +21 -0
.env.example
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
GOOGLE_API_KEY="YOUR-GEMINI-API-KEY"
|
| 2 |
+
NVIDIA_API_KEY="YOUR-NVIDIA-API-KEY"
|
| 3 |
+
FIRECRAWL_API_KEY="YOUR-FIRECRAWL-API-KEY"
|
.gitignore
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python artifacts
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.pyc
|
| 4 |
+
*.py[cod]
|
| 5 |
+
*.egg
|
| 6 |
+
*.egg-info/
|
| 7 |
+
dist/
|
| 8 |
+
build/
|
| 9 |
+
venv/
|
| 10 |
+
.env
|
| 11 |
+
resources/
|
| 12 |
+
|
| 13 |
+
# Chroma DB files
|
| 14 |
+
chroma_db/
|
| 15 |
+
*.sqlite3
|
| 16 |
+
index/
|
| 17 |
+
collections/
|
| 18 |
+
|
| 19 |
+
# Environment files
|
| 20 |
+
.env
|
| 21 |
+
*.log
|
| 22 |
+
|
| 23 |
+
# OS-specific
|
| 24 |
+
.DS_Store
|
| 25 |
+
Thumbs.db
|
| 26 |
+
|
| 27 |
+
# Docker
|
| 28 |
+
*.dockerfile
|
| 29 |
+
*.tar
|
| 30 |
+
*.log
|
| 31 |
+
*.pid
|
| 32 |
+
*.db
|
| 33 |
+
*.sqlite3
|
| 34 |
+
docker-compose.override.yml
|
Dockerfile
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use official Python base image
|
| 2 |
+
FROM python:3.10-slim-bookworm
|
| 3 |
+
|
| 4 |
+
# Set working directory
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Upgrade system packages to patch vulnerabilities
|
| 8 |
+
RUN apt-get update && apt-get upgrade -y && apt-get clean
|
| 9 |
+
|
| 10 |
+
# Copy requirements and install dependencies
|
| 11 |
+
COPY requirements.txt .
|
| 12 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 13 |
+
|
| 14 |
+
# Copy app code
|
| 15 |
+
COPY . .
|
| 16 |
+
|
| 17 |
+
# Expose Gradio default port
|
| 18 |
+
EXPOSE 7860
|
| 19 |
+
|
| 20 |
+
# Run the app
|
| 21 |
+
CMD ["python", "app.py"]
|
LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2025 KI-IAN
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
README.md
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: LLM Web Scrapper
|
| 3 |
+
emoji: 🤖
|
| 4 |
+
colorFrom: indigo
|
| 5 |
+
colorTo: pink
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 5.46.1
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
---
|
app.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
|
| 3 |
+
import firecrawl_client
|
| 4 |
+
import llm_inference_service
|
| 5 |
+
|
| 6 |
+
def parse_model_provider(selection):
|
| 7 |
+
# Expected format: "<model_name> (<provider>)"
|
| 8 |
+
if "(" in selection and ")" in selection:
|
| 9 |
+
model = selection.split(" (")[0].strip()
|
| 10 |
+
provider = selection.split(" (")[1].replace(")", "").strip()
|
| 11 |
+
return model, provider
|
| 12 |
+
raise ValueError(f"Invalid selection format: {selection}")
|
| 13 |
+
|
| 14 |
+
def llm_response_wrapper(query, scrape_result, model_provider_selection):
|
| 15 |
+
model, provider = parse_model_provider(model_provider_selection)
|
| 16 |
+
result = llm_inference_service.extract_page_info_by_llm(query, scrape_result, model, provider)
|
| 17 |
+
if not result or (isinstance(result, str) and result.strip() == ""):
|
| 18 |
+
return "❌ <span style='color:red;'>No information could be extracted from the scraped content. Please check your query or try a different model/provider.</span>"
|
| 19 |
+
return result
|
| 20 |
+
|
| 21 |
+
#Gradio UI
|
| 22 |
+
with gr.Blocks() as gradio_ui:
|
| 23 |
+
gr.HTML("""
|
| 24 |
+
<div style="display: flex; align-items: center; gap: 20px; flex-wrap: wrap; margin-bottom: 20px;">
|
| 25 |
+
<h1 style="margin: 0;"> LLM Web Scraper</h1>
|
| 26 |
+
<div style="display: flex; gap: 10px;">
|
| 27 |
+
<a href="https://github.com/langchain-ai/langchain" target="_blank">
|
| 28 |
+
<img src="https://img.shields.io/badge/LangChain-Framework-blue?logo=langchain" alt="LangChain">
|
| 29 |
+
</a>
|
| 30 |
+
<a href="https://ai.google.dev/gemini-api/docs" target="_blank">
|
| 31 |
+
<img src="https://img.shields.io/badge/Gemini%20API-Google-blue?logo=google" alt="Gemini API">
|
| 32 |
+
</a>
|
| 33 |
+
<a href="https://build.nvidia.com/models" target="_blank">
|
| 34 |
+
<img src="https://img.shields.io/badge/NVIDIA%20NIM-API-green?logo=nvidia" alt="NVIDIA NIM">
|
| 35 |
+
</a>
|
| 36 |
+
<a href="https://firecrawl.dev/" target="_blank">
|
| 37 |
+
<img src="https://img.shields.io/badge/FireCrawl-Web%20Scraper-orange?logo=fire" alt="FireCrawl">
|
| 38 |
+
</a>
|
| 39 |
+
<a href="https://github.com/crawl4ai/crawl4ai" target="_blank">
|
| 40 |
+
<img src="https://img.shields.io/badge/Crawl4AI-Web%20Scraper-blueviolet?logo=github" alt="Crawl4AI">
|
| 41 |
+
</a>
|
| 42 |
+
|
| 43 |
+
</div>
|
| 44 |
+
</div>
|
| 45 |
+
""")
|
| 46 |
+
|
| 47 |
+
gr.HTML("""
|
| 48 |
+
<div style="display: flex; align-items: center; gap: 10px; margin-bottom: 20px;">
|
| 49 |
+
<span style="font-size: 16px;">📦 <strong>Download the full source code:</strong></span>
|
| 50 |
+
<a href="https://github.com/KI-IAN/llm-web-scrapper" target="_blank">
|
| 51 |
+
<img src="https://img.shields.io/badge/GitHub-View%20Repo-blue?logo=github" alt="GitHub Repo">
|
| 52 |
+
</a>
|
| 53 |
+
</div>
|
| 54 |
+
""")
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
with gr.Column():
|
| 58 |
+
url_input = gr.Textbox(label="Enter URL to scrape", placeholder="https://example.com/query?search=cat+food", lines=1)
|
| 59 |
+
# search_query_input = gr.Textbox(label="Enter your query", placeholder="Paw paw fish adult cat food", lines=1)
|
| 60 |
+
query_input = gr.Textbox(label="What information do you want to find?", placeholder="Find product name, price, rating", lines=1)
|
| 61 |
+
scrape_btn = gr.Button("Scrape with FireCrawl")
|
| 62 |
+
|
| 63 |
+
scrape_result_textbox = gr.Textbox(label="Scrape Result", lines=20, show_copy_button=True)
|
| 64 |
+
|
| 65 |
+
label_llm_section = gr.Label("Use LLM to extract information from the scraped content")
|
| 66 |
+
gr.HTML("<hr>")
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
with gr.Row():
|
| 70 |
+
|
| 71 |
+
# Add a single dropdown for model and provider selection
|
| 72 |
+
model_provider_dropdown = gr.Dropdown(
|
| 73 |
+
label="Select Model & Provider",
|
| 74 |
+
choices=[
|
| 75 |
+
"gemini-2.5-flash-lite (google_genai)",
|
| 76 |
+
"gemini-2.5-pro (google_genai)",
|
| 77 |
+
"gemini-2.5-flash (google_genai)",
|
| 78 |
+
"bytedance/seed-oss-36b-instruct (nvidia)",
|
| 79 |
+
"deepseek-ai/deepseek-v3.1 (nvidia)",
|
| 80 |
+
"qwen/qwen3-next-80b-a3b-instruct (nvidia)",
|
| 81 |
+
],
|
| 82 |
+
value="gemini-2.5-flash-lite (google_genai)"
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
llm_response_btn = gr.Button("Extracted Info by LLM")
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
# LLM response output area and loader
|
| 90 |
+
llm_response = gr.Markdown(
|
| 91 |
+
"\n" * 9, # 9 newlines + 1 line for empty content = 10 lines minimum
|
| 92 |
+
label="LLM Response",
|
| 93 |
+
show_copy_button=True,
|
| 94 |
+
visible=True
|
| 95 |
+
)
|
| 96 |
+
# Removed custom loader; Gradio will show a spinner automatically during processing.
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
scrape_btn.click(fn=firecrawl_client.scrape_and_get_markdown_with_firecrawl, inputs=url_input, outputs=scrape_result_textbox)
|
| 100 |
+
|
| 101 |
+
llm_response_btn.click(
|
| 102 |
+
fn=llm_response_wrapper,
|
| 103 |
+
inputs=[query_input, scrape_result_textbox, model_provider_dropdown],
|
| 104 |
+
outputs=llm_response
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
gradio_ui.launch(server_name="0.0.0.0")
|
config.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
"""
|
| 3 |
+
Configuration module for loading environment variables and secrets.
|
| 4 |
+
- Loads environment variables from a `.env` file located in the project root if it exists else loads from Hugging Face's Secrets Tab.
|
| 5 |
+
- Provides access to the following secrets:
|
| 6 |
+
- GOOGLE_API_KEY: API key for Gemini LLM.
|
| 7 |
+
- NVIDIA_API_KEY: API key for NVIDIA LLM.
|
| 8 |
+
- FIRE_CRAWL_API_KEY: API key for FireCrawl.
|
| 9 |
+
- Prints warnings if required API keys are not set.
|
| 10 |
+
"""
|
| 11 |
+
from dotenv import load_dotenv
|
| 12 |
+
|
| 13 |
+
#Load .env only if running locally
|
| 14 |
+
env_path = os.path.join(os.path.dirname(__file__), '..', '.env')
|
| 15 |
+
|
| 16 |
+
if os.path.exists(env_path):
|
| 17 |
+
load_dotenv(dotenv_path=env_path)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
# Access Secrets
|
| 21 |
+
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
| 22 |
+
NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY")
|
| 23 |
+
FIRE_CRAWL_API_KEY = os.getenv("FIRECRAWL_API_KEY")
|
| 24 |
+
|
| 25 |
+
if not GOOGLE_API_KEY:
|
| 26 |
+
print("⚠️ Warning: GOOGLE_API_KEY is not set. Gemini LLM API may fail.")
|
| 27 |
+
|
| 28 |
+
if not NVIDIA_API_KEY:
|
| 29 |
+
print("⚠️ Warning: NVIDIA_API_KEY is not set. NVIDIA LLM API may fail.")
|
| 30 |
+
|
| 31 |
+
if not FIRE_CRAWL_API_KEY:
|
| 32 |
+
print("⚠️ Warning: FIRECRAWL_API_KEY is not set. FireCrawl API may fail.")
|
crawl4ai_client.py
ADDED
|
File without changes
|
docker-compose.dev.yml
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version: '3.8'
|
| 2 |
+
|
| 3 |
+
services:
|
| 4 |
+
semantic-search-app:
|
| 5 |
+
build:
|
| 6 |
+
context: .
|
| 7 |
+
dockerfile: Dockerfile
|
| 8 |
+
container_name: llm-web-scrapper
|
| 9 |
+
ports:
|
| 10 |
+
- "12200:7860"
|
| 11 |
+
environment:
|
| 12 |
+
- NVIDIA_API_KEY=${NVIDIA_API_KEY} # Load this key from .env in local/dev environment
|
| 13 |
+
- GOOGLE_API_KEY=${GOOGLE_API_KEY} # Load this key from .env in local/dev environment
|
| 14 |
+
- FIRECRAWL_API_KEY=${FIRECRAWL_API_KEY} # Load this key from .env in local/dev environment
|
| 15 |
+
volumes:
|
| 16 |
+
- .:/app:rw # This is for local development. Docker reads the code from the host machine. Changes on the host are reflected in the container.
|
| 17 |
+
restart: unless-stopped
|
docker-compose.yml
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version: '3.8'
|
| 2 |
+
|
| 3 |
+
services:
|
| 4 |
+
semantic-search-app:
|
| 5 |
+
build:
|
| 6 |
+
context: .
|
| 7 |
+
dockerfile: Dockerfile
|
| 8 |
+
container_name: llm-web-scrapper
|
| 9 |
+
ports:
|
| 10 |
+
- "12200:7860"
|
| 11 |
+
environment:
|
| 12 |
+
- NVIDIA_API_KEY=${NVIDIA_API_KEY} # Load this key from .env or manually add the secret
|
| 13 |
+
- GOOGLE_API_KEY=${GOOGLE_API_KEY} # Load this key from .env or manually add the secret
|
| 14 |
+
- FIRECRAWL_API_KEY=${FIRECRAWL_API_KEY} # Load this key from .env in local/dev environment
|
| 15 |
+
restart: unless-stopped
|
| 16 |
+
|
firecrawl_client.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_community.document_loaders import FireCrawlLoader
|
| 2 |
+
from langchain_core.documents import Document
|
| 3 |
+
from config import FIRE_CRAWL_API_KEY
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def scrape_with_firecrawl(url: str) -> list[Document]:
|
| 7 |
+
|
| 8 |
+
loader = FireCrawlLoader(url=url,
|
| 9 |
+
api_key=FIRE_CRAWL_API_KEY,
|
| 10 |
+
mode='scrape')
|
| 11 |
+
|
| 12 |
+
pages = []
|
| 13 |
+
|
| 14 |
+
for page in loader.lazy_load(): # type: ignore
|
| 15 |
+
pages.append(page)
|
| 16 |
+
|
| 17 |
+
return pages
|
| 18 |
+
|
| 19 |
+
def get_markdown_from_documents(docs: list[Document]) -> str:
|
| 20 |
+
markdown_content = ""
|
| 21 |
+
for i, doc in enumerate(docs):
|
| 22 |
+
markdown_content += f"### Page {i+1}\n"
|
| 23 |
+
markdown_content += f"{doc.page_content}\n\n--------------\n\n"
|
| 24 |
+
return markdown_content
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def scrape_and_get_markdown_with_firecrawl(url: str) -> str:
|
| 28 |
+
docs = scrape_with_firecrawl(url)
|
| 29 |
+
markdown = get_markdown_from_documents(docs)
|
| 30 |
+
return markdown
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
|
llm_inference_service.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain.chat_models import init_chat_model
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def extract_page_info_by_llm(user_query: str, scraped_markdown_content: str, model_name: str, model_provider: str) -> str:
|
| 5 |
+
|
| 6 |
+
if not scraped_markdown_content:
|
| 7 |
+
return "No relevant information found to answer your question."
|
| 8 |
+
|
| 9 |
+
context = scraped_markdown_content
|
| 10 |
+
|
| 11 |
+
prompt = f"""
|
| 12 |
+
You are an expert assistant who can extract useful information from the content provided to you. Most of the time,
|
| 13 |
+
the content will be product pages from e-commerce websites. Users will ask you to extract product information such as product name, price, rating, etc.
|
| 14 |
+
|
| 15 |
+
Please provide your identity (model name and provider if applicable) at the beginning of your answer.
|
| 16 |
+
|
| 17 |
+
Use the following context to answer the user's question. Provide the final answer in a markdown table format if you are asked to extract product information.
|
| 18 |
+
If you can't extract anything useful provide in plain markdown format.
|
| 19 |
+
|
| 20 |
+
If user asks for JSON format, please provide the answer in JSON format only.
|
| 21 |
+
|
| 22 |
+
If you do not find or know the answer, do not hallucinate, do not try to generate fake answers.
|
| 23 |
+
If no Context is given, simply state "No relevant information found to answer your question."
|
| 24 |
+
|
| 25 |
+
Context:
|
| 26 |
+
{context}
|
| 27 |
+
|
| 28 |
+
Question:
|
| 29 |
+
{user_query}
|
| 30 |
+
|
| 31 |
+
Your Identity:
|
| 32 |
+
|
| 33 |
+
Answer:
|
| 34 |
+
|
| 35 |
+
"""
|
| 36 |
+
|
| 37 |
+
llm = init_chat_model(model_name, model_provider=model_provider)
|
| 38 |
+
response = llm.invoke(prompt)
|
| 39 |
+
return response.content
|
| 40 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# gradio==5.46.1
|
| 2 |
+
# langchain==0.3.27
|
| 3 |
+
# langchain-community==0.3.29
|
| 4 |
+
# chromadb==1.1.0
|
| 5 |
+
# PyMuPDF==1.26.4
|
| 6 |
+
# langchain-google-genai==2.1.12
|
| 7 |
+
# langchain-nvidia-ai-endpoints==0.3.18
|
| 8 |
+
# dotenv==0.9.9
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
gradio
|
| 13 |
+
requests
|
| 14 |
+
# python-dotenv
|
| 15 |
+
dotenv
|
| 16 |
+
|
| 17 |
+
firecrawl-py
|
| 18 |
+
langchain-community
|
| 19 |
+
langchain-google-genai
|
| 20 |
+
langchain-nvidia-ai-endpoints
|
| 21 |
+
# groq
|