Spaces:
Sleeping
Sleeping
File size: 6,093 Bytes
60086c2 706a637 60086c2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
import gradio as gr
import os
import re
import requests
import numpy as np
import torch
from sklearn.neighbors import NearestNeighbors
from sentence_transformers import SentenceTransformer
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
# --- CONFIGURATION ---
HF_TOKEN = os.getenv("HF_TOKEN", "").strip()
HF_MODEL = "cutycat2000x/MeowGPT-3" # Change this if needed
HF_API_URL = f"https://api-inference.huggingface.co/models/{HF_MODEL}"
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
FILES = ["main1.txt", "main2.txt", "main3.txt", "main4.txt", "main5.txt", "main6.txt"]
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
EMBEDDING_CACHE_FILE = "embeddings.npy"
CHUNKS_CACHE_FILE = "chunks.npy"
# --- FUNCTIONS ---
def test_model_connection():
try:
print("🔍 Testing Hugging Face model availability...")
test_response = requests.get(HF_API_URL, headers=headers, timeout=10)
print("Status Code:", test_response.status_code)
print("Response JSON:", test_response.json())
except Exception as e:
print("❌ Connection Test Failed:", e)
def load_text_files(file_list):
knowledge = ""
for file_name in file_list:
try:
with open(file_name, "r", encoding="utf-8") as f:
knowledge += "\n" + f.read()
except Exception as e:
print(f"Error reading {file_name}: {e}")
return knowledge.strip()
def chunk_text(text, max_chunk_length=500):
sentences = re.split(r'(?<=[.!?])\s+', text)
chunks = []
current_chunk = ""
for sentence in sentences:
if len(current_chunk) + len(sentence) <= max_chunk_length:
current_chunk += " " + sentence
else:
chunks.append(current_chunk.strip())
current_chunk = sentence
if current_chunk:
chunks.append(current_chunk.strip())
return chunks
def embed_texts(texts):
return model.encode(texts, convert_to_numpy=True, normalize_embeddings=True)
def save_cache(embeddings, chunks):
np.save(EMBEDDING_CACHE_FILE, embeddings)
np.save(CHUNKS_CACHE_FILE, np.array(chunks))
def load_cache():
if os.path.exists(EMBEDDING_CACHE_FILE) and os.path.exists(CHUNKS_CACHE_FILE):
embeddings = np.load(EMBEDDING_CACHE_FILE, allow_pickle=True)
chunks = np.load(CHUNKS_CACHE_FILE, allow_pickle=True).tolist()
print("✅ Loaded cached embeddings and chunks.")
return embeddings, chunks
return None, None
def retrieve_chunks(query, top_k=5):
query_embedding = embed_texts([query])
distances, indices = nn_model.kneighbors(query_embedding, n_neighbors=top_k)
return [chunks[i] for i in indices[0]]
def build_prompt(question):
relevant_chunks = retrieve_chunks(question)
context = "\n".join(relevant_chunks)
system_instruction = """You are an AI-supported financial expert. Your role is to answer questions strictly within the context of the university lecture "Financial Markets" (Universität Duisburg-Essen).
Important instructions:
1. Base your answers primarily on the provided lecture excerpts ("lecture_slides").
2. If an answer is not directly covered by the lecture content, you may elaborate — but **only if you are absolutely certain**. Avoid making up information.
3. If you are unsure, reply politely:
"Entschuldigung. Leider kenne ich die Antwort auf diese Frage nicht."
4. If a formula is relevant, show the **exact formula** and explain it in **simple terms**.
5. Do not give vague or speculative answers — it's better to skip a question than guess.
6. **Never generate your own questions. Only respond to the given question.**
7. **Always respond in German.**
8. Make your answers clear, fact-based, and well-structured.
"""
prompt = f"""{system_instruction}
Vorlesungsinhalte:
{context}
--- Ende der Vorlesungsinhalte ---
Frage des Nutzers (bitte nur diese beantworten): {question}
Antwort:"""
return prompt
def respond(message, history):
try:
prompt = build_prompt(message)
payload = {
"inputs": prompt,
"parameters": {
"temperature": 0.2,
"max_new_tokens": 400,
"stop": ["Frage:", "Question:", "User:", "Frage des Nutzers"]
},
}
response = requests.post(HF_API_URL, headers=headers, json=payload, timeout=60)
response.raise_for_status()
output = response.json()
if isinstance(output, list) and "generated_text" in output[0]:
generated_text = output[0]["generated_text"]
answer = generated_text[len(prompt):].strip()
else:
print("❗️HF API returned unexpected format:", output)
answer = "❌ Modell hat keine gültige Antwort geliefert. Bitte später erneut versuchen."
except Exception as e:
print("API Error:", e)
try:
print("Raw HF response:", response.text)
except:
pass
answer = "❌ Error contacting the model. Please check your token, timeout, or model availability."
if history is None:
history = []
history.append({"role": "assistant", "content": answer})
return answer
# --- INIT SECTION ---
print("🔄 Initializing embedding model...")
model = SentenceTransformer(EMBEDDING_MODEL)
chunk_embeddings, chunks = load_cache()
if chunk_embeddings is None or chunks is None:
print("🛠 No cache found. Processing text...")
knowledge_base = load_text_files(FILES)
chunks = chunk_text(knowledge_base)
chunk_embeddings = embed_texts(chunks)
save_cache(chunk_embeddings, chunks)
print("✅ Embeddings and chunks cached.")
nn_model = NearestNeighbors(metric="cosine")
nn_model.fit(chunk_embeddings)
# --- GRADIO INTERFACE ---
demo = gr.ChatInterface(
fn=respond,
title="📚 RAG Chatbot Finanzmärkte",
description="Stelle Fragen basierend auf den hochgeladenen Vorlesungstexten.",
chatbot=gr.Chatbot(type="messages"),
)
if __name__ == "__main__":
test_model_connection()
demo.launch(debug=True)
|