File size: 6,093 Bytes
60086c2
 
 
 
 
 
 
 
 
 
 
 
706a637
60086c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import gradio as gr
import os
import re
import requests
import numpy as np
import torch
from sklearn.neighbors import NearestNeighbors
from sentence_transformers import SentenceTransformer
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM

# --- CONFIGURATION ---
HF_TOKEN = os.getenv("HF_TOKEN", "").strip()
HF_MODEL = "cutycat2000x/MeowGPT-3"  # Change this if needed
HF_API_URL = f"https://api-inference.huggingface.co/models/{HF_MODEL}"
headers = {"Authorization": f"Bearer {HF_TOKEN}"}

FILES = ["main1.txt", "main2.txt", "main3.txt", "main4.txt", "main5.txt", "main6.txt"]
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"

EMBEDDING_CACHE_FILE = "embeddings.npy"
CHUNKS_CACHE_FILE = "chunks.npy"

# --- FUNCTIONS ---

def test_model_connection():
    try:
        print("🔍 Testing Hugging Face model availability...")
        test_response = requests.get(HF_API_URL, headers=headers, timeout=10)
        print("Status Code:", test_response.status_code)
        print("Response JSON:", test_response.json())
    except Exception as e:
        print("❌ Connection Test Failed:", e)

def load_text_files(file_list):
    knowledge = ""
    for file_name in file_list:
        try:
            with open(file_name, "r", encoding="utf-8") as f:
                knowledge += "\n" + f.read()
        except Exception as e:
            print(f"Error reading {file_name}: {e}")
    return knowledge.strip()

def chunk_text(text, max_chunk_length=500):
    sentences = re.split(r'(?<=[.!?])\s+', text)
    chunks = []
    current_chunk = ""
    for sentence in sentences:
        if len(current_chunk) + len(sentence) <= max_chunk_length:
            current_chunk += " " + sentence
        else:
            chunks.append(current_chunk.strip())
            current_chunk = sentence
    if current_chunk:
        chunks.append(current_chunk.strip())
    return chunks

def embed_texts(texts):
    return model.encode(texts, convert_to_numpy=True, normalize_embeddings=True)

def save_cache(embeddings, chunks):
    np.save(EMBEDDING_CACHE_FILE, embeddings)
    np.save(CHUNKS_CACHE_FILE, np.array(chunks))

def load_cache():
    if os.path.exists(EMBEDDING_CACHE_FILE) and os.path.exists(CHUNKS_CACHE_FILE):
        embeddings = np.load(EMBEDDING_CACHE_FILE, allow_pickle=True)
        chunks = np.load(CHUNKS_CACHE_FILE, allow_pickle=True).tolist()
        print("✅ Loaded cached embeddings and chunks.")
        return embeddings, chunks
    return None, None

def retrieve_chunks(query, top_k=5):
    query_embedding = embed_texts([query])
    distances, indices = nn_model.kneighbors(query_embedding, n_neighbors=top_k)
    return [chunks[i] for i in indices[0]]

def build_prompt(question):
    relevant_chunks = retrieve_chunks(question)
    context = "\n".join(relevant_chunks)

    system_instruction = """You are an AI-supported financial expert. Your role is to answer questions strictly within the context of the university lecture "Financial Markets" (Universität Duisburg-Essen).
Important instructions:
1. Base your answers primarily on the provided lecture excerpts ("lecture_slides").
2. If an answer is not directly covered by the lecture content, you may elaborate — but **only if you are absolutely certain**. Avoid making up information.
3. If you are unsure, reply politely: 
   "Entschuldigung. Leider kenne ich die Antwort auf diese Frage nicht."
4. If a formula is relevant, show the **exact formula** and explain it in **simple terms**.
5. Do not give vague or speculative answers — it's better to skip a question than guess.
6. **Never generate your own questions. Only respond to the given question.**
7. **Always respond in German.**
8. Make your answers clear, fact-based, and well-structured.
"""

    prompt = f"""{system_instruction}
Vorlesungsinhalte:
{context}
--- Ende der Vorlesungsinhalte ---
Frage des Nutzers (bitte nur diese beantworten): {question}
Antwort:"""
    return prompt

def respond(message, history):
    try:
        prompt = build_prompt(message)
        payload = {
            "inputs": prompt,
            "parameters": {
                "temperature": 0.2,
                "max_new_tokens": 400,
                "stop": ["Frage:", "Question:", "User:", "Frage des Nutzers"]
            },
        }

        response = requests.post(HF_API_URL, headers=headers, json=payload, timeout=60)
        response.raise_for_status()
        output = response.json()

        if isinstance(output, list) and "generated_text" in output[0]:
            generated_text = output[0]["generated_text"]
            answer = generated_text[len(prompt):].strip()
        else:
            print("❗️HF API returned unexpected format:", output)
            answer = "❌ Modell hat keine gültige Antwort geliefert. Bitte später erneut versuchen."

    except Exception as e:
        print("API Error:", e)
        try:
            print("Raw HF response:", response.text)
        except:
            pass
        answer = "❌ Error contacting the model. Please check your token, timeout, or model availability."

    if history is None:
        history = []

    history.append({"role": "assistant", "content": answer})
    return answer

# --- INIT SECTION ---

print("🔄 Initializing embedding model...")
model = SentenceTransformer(EMBEDDING_MODEL)

chunk_embeddings, chunks = load_cache()

if chunk_embeddings is None or chunks is None:
    print("🛠 No cache found. Processing text...")
    knowledge_base = load_text_files(FILES)
    chunks = chunk_text(knowledge_base)
    chunk_embeddings = embed_texts(chunks)
    save_cache(chunk_embeddings, chunks)
    print("✅ Embeddings and chunks cached.")

nn_model = NearestNeighbors(metric="cosine")
nn_model.fit(chunk_embeddings)

# --- GRADIO INTERFACE ---

demo = gr.ChatInterface(
    fn=respond,
    title="📚 RAG Chatbot Finanzmärkte",
    description="Stelle Fragen basierend auf den hochgeladenen Vorlesungstexten.",
    chatbot=gr.Chatbot(type="messages"),
)

if __name__ == "__main__":
    test_model_connection()
    demo.launch(debug=True)