Spaces:
Runtime error
Runtime error
import re | |
import requests | |
from bs4 import BeautifulSoup | |
# DuckDuckGo Search | |
from duckduckgo_search import DDGS | |
# SerpAPI client for various search engines (Google, Bing, etc.) | |
from serpapi import GoogleSearch | |
from rake_nltk import Rake | |
import gradio as gr | |
from transformers import pipeline | |
# 1) Keyword extractor using RAKE | |
rake = Rake() | |
def extract_keywords(text: str) -> list[str]: | |
rake.extract_keywords_from_text(text) | |
return rake.get_ranked_phrases()[:5] | |
# 2) Search functions | |
def ddg_search_links(query: str, num: int = 5) -> list[str]: | |
ddgs = DDGS() | |
results = ddgs.text(query, max_results=num) | |
return [r['href'] for r in results] | |
def google_search_links(query: str, num: int = 5) -> list[str]: | |
from googlesearch import search | |
return list(search(query, num_results=num, pause=2)) | |
def serpapi_search_links(query: str, api_key: str, engine: str = 'bing', num: int = 5) -> list[str]: | |
params = {"engine": engine, "q": query, "api_key": api_key} | |
client = GoogleSearch(params) | |
data = client.get_dict() | |
results = data.get('organic_results', []) | |
return [r['link'] for r in results if not r.get('sponsored')][:num] | |
# 3) Fetch page text for summarization | |
def fetch_text(url: str) -> str: | |
try: | |
resp = requests.get(url, timeout=3) | |
soup = BeautifulSoup(resp.text, 'html.parser') | |
texts = soup.find_all(['p', 'h1', 'h2', 'h3']) | |
return ' '.join(t.get_text() for t in texts) | |
except: | |
return '' | |
# 4) Model loader: lightweight HF model | |
generator = pipeline('text-generation', model='google/flan-t5-small', trust_remote_code=True) | |
def model_answer(prompt: str) -> str: | |
return generator(prompt, max_length=256, do_sample=False)[0]['generated_text'] | |
# 5) Detect forbidden search phrases | |
FORBID_PATTERNS = [ | |
"bitte nicht im internet suchen", "keine websuche", "mach das ohne web", | |
"ohne online", "nur dein wissen", "nicht googeln", "such nicht" | |
] | |
def search_forbidden(prompt: str) -> bool: | |
pl = prompt.lower() | |
return any(phrase in pl for phrase in FORBID_PATTERNS) | |
# 6) Check if answer is uncertain | |
UNCERTAIN_MARKERS = [ | |
"ich weiß nicht", "nicht in meinen daten", "keine information", "ich bin mir nicht sicher" | |
] | |
def is_uncertain(answer: str) -> bool: | |
al = answer.lower() | |
return any(marker in al for marker in UNCERTAIN_MARKERS) | |
# 7) Core processing logic | |
def process(prompt: str, web_enabled: bool, serpapi_key: str) -> str: | |
# Extract keywords for search | |
keywords = extract_keywords(prompt) | |
query = ' '.join(keywords) | |
# If user forbids search | |
if search_forbidden(prompt): | |
ans = model_answer(prompt) | |
if is_uncertain(ans): | |
return ( | |
"Ich weiß leider nichts über das Thema aus meinem Training. " | |
"Da du Websuche verboten hast, versuche ich es trotzdem, " | |
"aber es kann ungenau sein.\n\n" + ans | |
) | |
return ans | |
# If websearch disabled, just use model | |
if not web_enabled: | |
return model_answer(prompt) | |
# Websearch enabled: model first | |
ans = model_answer(prompt) | |
if not is_uncertain(ans): | |
return ans | |
# Model uncertain: perform multi-engine search | |
links = [] | |
links += google_search_links(query) | |
links += ddg_search_links(query) | |
if serpapi_key: | |
links += serpapi_search_links(query, serpapi_key, engine='bing') | |
links += serpapi_search_links(query, serpapi_key, engine='google') | |
unique_links = list(dict.fromkeys(links)) | |
# Fetch top 3 pages and summarize | |
texts = [fetch_text(u) for u in unique_links[:3]] | |
combined = '\n'.join(texts) | |
summary = generator(combined, max_length=256, do_sample=False)[0]['generated_text'] | |
return summary | |
# 8) Gradio UI | |
with gr.Blocks() as demo: | |
gr.Markdown("# Intelligente KI mit Multi-Engine-Websuche") | |
with gr.Row(): | |
prompt_input = gr.Textbox(label="Dein Prompt", lines=3) | |
web_switch = gr.Checkbox(label="Websuche aktivieren", value=False) | |
serp_input = gr.Textbox(label="SerpAPI Key (optional für SerpAPI-Suche)", placeholder="API Key einfügen") | |
btn = gr.Button("Antwort generieren") | |
output = gr.Textbox(label="Antwort", lines=10) | |
btn.click( | |
fn=process, | |
inputs=[prompt_input, web_switch, serp_input], | |
outputs=output | |
) | |
gr.Spacer() | |
gr.Markdown("---") | |
gr.Markdown("*Hinweis: Suche nur bei aktivierter Websuche und nicht bei verbotenen Phrasen.*") | |
demo.launch() |