Spaces:
Paused
Paused
| from fastapi import FastAPI | |
| from typing import List, Dict | |
| import pandas as pd | |
| import requests | |
| from bs4 import BeautifulSoup | |
| app = FastAPI() | |
| # Global variable to store the dataset | |
| kali_tools_df = None | |
| def scrape_kali_tools(base_url: str = "https://www.kali.org/tools/") -> pd.DataFrame: | |
| """ | |
| Scrapes the Kali Linux tools documentation page and returns a structured dataset. | |
| Parameters: | |
| - base_url: The URL of the Kali Linux tools documentation. | |
| Returns: | |
| - Pandas DataFrame containing tool names, descriptions, and links. | |
| """ | |
| response = requests.get(base_url) | |
| response.raise_for_status() | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| # Extract tool names and descriptions | |
| tools = [] | |
| for tool in soup.select(".tools--index__item"): | |
| name = tool.select_one(".tools--index__title").get_text(strip=True) | |
| description = tool.select_one(".tools--index__description").get_text(strip=True) | |
| link = tool.find("a", href=True)["href"] | |
| tools.append({"name": name, "description": description, "link": link}) | |
| # Convert to DataFrame | |
| return pd.DataFrame(tools) | |
| def scrape_kali_tools_endpoint(): | |
| """ | |
| Scrapes the Kali Linux tools documentation and stores it in memory. | |
| Returns: | |
| - Message indicating the dataset has been created. | |
| """ | |
| global kali_tools_df | |
| kali_tools_df = scrape_kali_tools() | |
| return {"message": f"Scraped {len(kali_tools_df)} tools from Kali Linux documentation."} | |
| def get_kali_tools(start: int = 0, limit: int = 10) -> List[Dict]: | |
| """ | |
| Fetches a chunk of the Kali tools dataset. | |
| Parameters: | |
| - start: Starting index of the tools to fetch. | |
| - limit: Number of tools to return. | |
| Returns: | |
| - A list of tools with their names, descriptions, and links. | |
| """ | |
| if kali_tools_df is None: | |
| return {"error": "Dataset not yet scraped. Call /scrape_kali_tools first."} | |
| return kali_tools_df.iloc[start:start + limit].to_dict(orient="records") | |
| def search_kali_tools(keyword: str) -> List[Dict]: | |
| """ | |
| Searches the Kali tools dataset for a specific keyword. | |
| Parameters: | |
| - keyword: Keyword to search in tool names or descriptions. | |
| Returns: | |
| - A list of tools matching the keyword. | |
| """ | |
| if kali_tools_df is None: | |
| return {"error": "Dataset not yet scraped. Call /scrape_kali_tools first."} | |
| results = kali_tools_df[ | |
| kali_tools_df["name"].str.contains(keyword, case=False) | | |
| kali_tools_df["description"].str.contains(keyword, case=False) | |
| ] | |
| return results.to_dict(orient="records") |