Spaces:
Runtime error
Runtime error
File size: 4,498 Bytes
41dfbd8 b3ba34d 99fa459 1d7b739 b3ba34d 41dfbd8 f03f83e 41dfbd8 f03f83e 99fa459 41dfbd8 1d7b739 41dfbd8 1d7b739 f03f83e 41dfbd8 1d7b739 99fa459 f03f83e 1d7b739 b3ba34d 1d7b739 b3ba34d 1d7b739 41dfbd8 f03f83e 99fa459 1d7b739 41dfbd8 99fa459 41dfbd8 f03f83e 99fa459 41dfbd8 f03f83e 1d7b739 41dfbd8 f03f83e 99fa459 41dfbd8 99fa459 41dfbd8 99fa459 f03f83e 99fa459 41dfbd8 99fa459 f03f83e 99fa459 1d7b739 99fa459 41dfbd8 99fa459 41dfbd8 99fa459 41dfbd8 99fa459 1d7b739 99fa459 1d7b739 99fa459 1d7b739 99fa459 41dfbd8 99fa459 41dfbd8 99fa459 1d7b739 41dfbd8 99fa459 b3ba34d 99fa459 41dfbd8 b3ba34d 1d7b739 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
import re
import requests
from bs4 import BeautifulSoup
# DuckDuckGo Search
from duckduckgo_search import DDGS
# SerpAPI client for various search engines (Google, Bing, etc.)
from serpapi import GoogleSearch
from rake_nltk import Rake
import gradio as gr
from transformers import pipeline
# 1) Keyword extractor using RAKE
rake = Rake()
def extract_keywords(text: str) -> list[str]:
rake.extract_keywords_from_text(text)
return rake.get_ranked_phrases()[:5]
# 2) Search functions
def ddg_search_links(query: str, num: int = 5) -> list[str]:
ddgs = DDGS()
results = ddgs.text(query, max_results=num)
return [r['href'] for r in results]
def google_search_links(query: str, num: int = 5) -> list[str]:
from googlesearch import search
return list(search(query, num_results=num, pause=2))
def serpapi_search_links(query: str, api_key: str, engine: str = 'bing', num: int = 5) -> list[str]:
params = {"engine": engine, "q": query, "api_key": api_key}
client = GoogleSearch(params)
data = client.get_dict()
results = data.get('organic_results', [])
return [r['link'] for r in results if not r.get('sponsored')][:num]
# 3) Fetch page text for summarization
def fetch_text(url: str) -> str:
try:
resp = requests.get(url, timeout=3)
soup = BeautifulSoup(resp.text, 'html.parser')
texts = soup.find_all(['p', 'h1', 'h2', 'h3'])
return ' '.join(t.get_text() for t in texts)
except:
return ''
# 4) Model loader: lightweight HF model
generator = pipeline('text-generation', model='google/flan-t5-small', trust_remote_code=True)
def model_answer(prompt: str) -> str:
return generator(prompt, max_length=256, do_sample=False)[0]['generated_text']
# 5) Detect forbidden search phrases
FORBID_PATTERNS = [
"bitte nicht im internet suchen", "keine websuche", "mach das ohne web",
"ohne online", "nur dein wissen", "nicht googeln", "such nicht"
]
def search_forbidden(prompt: str) -> bool:
pl = prompt.lower()
return any(phrase in pl for phrase in FORBID_PATTERNS)
# 6) Check if answer is uncertain
UNCERTAIN_MARKERS = [
"ich weiß nicht", "nicht in meinen daten", "keine information", "ich bin mir nicht sicher"
]
def is_uncertain(answer: str) -> bool:
al = answer.lower()
return any(marker in al for marker in UNCERTAIN_MARKERS)
# 7) Core processing logic
def process(prompt: str, web_enabled: bool, serpapi_key: str) -> str:
# Extract keywords for search
keywords = extract_keywords(prompt)
query = ' '.join(keywords)
# If user forbids search
if search_forbidden(prompt):
ans = model_answer(prompt)
if is_uncertain(ans):
return (
"Ich weiß leider nichts über das Thema aus meinem Training. "
"Da du Websuche verboten hast, versuche ich es trotzdem, "
"aber es kann ungenau sein.\n\n" + ans
)
return ans
# If websearch disabled, just use model
if not web_enabled:
return model_answer(prompt)
# Websearch enabled: model first
ans = model_answer(prompt)
if not is_uncertain(ans):
return ans
# Model uncertain: perform multi-engine search
links = []
links += google_search_links(query)
links += ddg_search_links(query)
if serpapi_key:
links += serpapi_search_links(query, serpapi_key, engine='bing')
links += serpapi_search_links(query, serpapi_key, engine='google')
unique_links = list(dict.fromkeys(links))
# Fetch top 3 pages and summarize
texts = [fetch_text(u) for u in unique_links[:3]]
combined = '\n'.join(texts)
summary = generator(combined, max_length=256, do_sample=False)[0]['generated_text']
return summary
# 8) Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# Intelligente KI mit Multi-Engine-Websuche")
with gr.Row():
prompt_input = gr.Textbox(label="Dein Prompt", lines=3)
web_switch = gr.Checkbox(label="Websuche aktivieren", value=False)
serp_input = gr.Textbox(label="SerpAPI Key (optional für SerpAPI-Suche)", placeholder="API Key einfügen")
btn = gr.Button("Antwort generieren")
output = gr.Textbox(label="Antwort", lines=10)
btn.click(
fn=process,
inputs=[prompt_input, web_switch, serp_input],
outputs=output
)
gr.Spacer()
gr.Markdown("---")
gr.Markdown("*Hinweis: Suche nur bei aktivierter Websuche und nicht bei verbotenen Phrasen.*")
demo.launch() |