Upload 8 files

Browse files

Files changed (8) hide show

.env +6 -0
Q&A_cleaned.json +158 -0
app.py +66 -0
auto_tester.py +79 -0
pages/admin_dashboard.py +93 -0
qa_loader.py +38 -0
rag_chain.py +121 -0
requirements.txt +12 -0

.env ADDED Viewed

	@@ -0,0 +1,6 @@

+# ✅ API Key'leri doğrudan yazdık (şimdilik)
+OPENAI_API_KEY = "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"
+TAVILY_API_KEY = "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"
+ADMIN_USERNAME=superadmin
+ADMIN_PASSWORD=admin123

Q&A_cleaned.json ADDED Viewed

	@@ -0,0 +1,158 @@

+[
+    {
+        "QUESTION": "Where can I find the timetable?",
+        "ANSWER": "The timetable is available in the USOSweb (University Study-Oriented System).\u00a0The link to the USOSweb is available on the University\u2019s website, in the Intranet tab. Students can see their individual timetable after logging the USOS system."
+    },
+    {
+        "QUESTION": "Where can I find information about the exam session?",
+        "ANSWER": "Exam session dates are given in the organisation of the academic year. Each lecturer is obliged to inform students about the date of the exam / final test."
+    },
+    {
+        "QUESTION": "How can I collect my student ID card?",
+        "ANSWER": "Student ID cards for students of the 1st semester can be collected at the reception of the University, the remaining cards should be collected at the Dean\u2019s Office."
+    },
+    {
+        "QUESTION": "When will the timetable be available?",
+        "ANSWER": "According to the Regulations, the timetable is published two weeks before the beginning of courses."
+    },
+    {
+        "QUESTION": "How can I find a lecturer?",
+        "ANSWER": "All information regarding courses is provided in the timetable on the USOSweb. Information on the lecturer (name and surname) is given in the description of each course and that is where you can check his/her timetable or send him/her an e-mail."
+    },
+    {
+        "QUESTION": "I need help with applying for a Residence Card. Can anyone help me?",
+        "ANSWER": "Yes, you may come to room 307. They can help you with documents."
+    },
+    {
+        "QUESTION": "I don\u2019t have a language certificate, can I pass an English exam at the University?",
+        "ANSWER": "Yes, you may pass an exam at the university. You may visit room 308, and register there for the exam."
+    },
+    {
+        "QUESTION": "I don\u2019t have a medical certificate, can I do that at the university?",
+        "ANSWER": "Yes,\u00a0 if you do not have a medical certificate, you may get it from our University doctor (the cost is 100 PLN)."
+    },
+    {
+        "QUESTION": "What is a medical certificate?",
+        "ANSWER": "It is a certificate from your doctor stating that you don\u2019t have any restrictions to conducting your studies."
+    },
+    {
+        "QUESTION": "I need to submit my original documents. Where do I need to go?",
+        "ANSWER": "After arrival, you have 2 days to submit your original documents to the Recruitment office, room 28."
+    },
+    {
+        "QUESTION": "Can someone help to show me around Warsaw?",
+        "ANSWER": "Yes, we have a Mentoring program,  so you can apply and get assigned a Mentor, at the following e-mail address "
+    },
+    {
+        "QUESTION": "I am a new student coming from abroad. I need someone to pick me up from the airport.",
+        "ANSWER": "You can apply for the Mentoring program, and you will be assigned a person who will help you get comfortable in Poland. You can apply here:\u00a0https://studentactivity.vistula.edu.pl/"
+    },
+    {
+        "QUESTION": "I want accommodation, where can I book it? Do you have a dormitory?",
+        "ANSWER": "In order to find out full information about accommodation, Please contact accommodation@vistula.edu.pl. Vistula offers private dormitories, as well as our own dormitory."
+    },
+    {
+        "QUESTION": "I want to apply for Erasmus",
+        "ANSWER": "Please, find detailed information about Erasmus here \u2013\u00a0https://www.vistula.edu.pl/en/students/erasmus or, alternatively, you may visit room 123."
+    },
+    {
+        "QUESTION": "Do I need to pay the registration fee first?",
+        "ANSWER": "No, we will send you the invoice with the tuition fee, registration fee, and student card fee."
+    },
+    {
+        "QUESTION": "I applied, how can I pay?",
+        "ANSWER": "Don\u2019t worry, the university will send your invoice after reviewing your documents. The invoice can be found in the application section: Payments."
+    },
+    {
+        "QUESTION": "How to reset your e-mail password?",
+        "ANSWER": "Write to ict-help@vistula.edu.pl. Unless you want to change them but know the old ones, then you can do it yourself on the Vistula mail site."
+    },
+    {
+        "QUESTION": "How to log into the university system?",
+        "ANSWER": "After submitting the documents to the Recruitment Department and admission to studies, within a few days of registration, you will receive logins to individual university systems along with detailed instructions on how to use them."
+    },
+    {
+        "QUESTION": "My account is inactive, what should I do?",
+        "ANSWER": "If you have already received a login and password and you cannot log in, please contact our IT department. New students who have not received an email with information on how to log in, should contact the Admissions Department. Students who continue their studies and their account is no longer active after the semester, should contact their Field Supervisor at the Dean\u2019s Office."
+    },
+    {
+        "QUESTION": "How to log into Vistula systems?",
+        "ANSWER": "You can log into all systems with one account \u2013 an e-mail account in the @stu.vistula.edu.pl domain."
+    },
+    {
+        "QUESTION": "I do not have or have a incorrect ID, what should I do?",
+        "ANSWER": "Please send an e-mail to the following address: ict-help@vistula.edu.pl"
+    },
+    {
+        "QUESTION": "Where are my grades?",
+        "ANSWER": "In the USOSweb system."
+    },
+    {
+        "QUESTION": "Is there any department to help me prepare for job interviews?",
+        "ANSWER": "Yes. We have a career office in university can help you about your job interviews.  Our Career Advisor will be happy to give you tips on preparing for job interviews."
+    },
+    {
+        "QUESTION": "Is there any department to help me prepare an effective CV?",
+        "ANSWER": "Of course. CV consultations are available in the Careers and Internship Department. Each student can count on the help of an experienced career counsellor."
+    },
+    {
+        "QUESTION": "Can the University help me with finding a future job?",
+        "ANSWER": "Yes, we do have a Careers Department. They can help you to draw up a CV, and find an internship or job."
+    },
+    {
+        "QUESTION": "Where do I have to submit documents for the internship?",
+        "ANSWER": "Documents of completed internships students of VSH submit to the Career and Internship Department (1st floor, room 116). On the other hand, VU students send scanned documents to the e-mail address of their tutor."
+    },
+    {
+        "QUESTION": "Is Poland cold?",
+        "ANSWER": "Poland experiences cold winters, especially in the northern and eastern regions, with temperatures often dropping below freezing. Summers are milder and more comfortable."
+    },
+    {
+        "QUESTION": "How can I buy public transportation tickets?",
+        "ANSWER": "You can purchase tickets from ticket vending machines located at some bus stops and metro stations."
+    },
+    {
+        "QUESTION": "What types of public transportation are used in Warsaw?",
+        "ANSWER": "In Warsaw, public transportation includes buses, trams, the metro, and trains."
+    },
+    {
+        "QUESTION": "Are there single-use public transportation tickets?",
+        "ANSWER": "There are two types of single-use public transportation tickets. One covers a 20-minute journey, and the other covers a 75-minute journey."
+    },
+    {
+        "QUESTION": "How can I travel from the airport to the city center?",
+        "ANSWER": "You can travel from the airport to the city center using public transportation such as buses and trains. Additionally, taxis are available at the airport."
+    },
+    {
+        "QUESTION": "How can I use a taxi?",
+        "ANSWER": "Taking a taxi in Warsaw is easy. You can use applications like Uber, Bolt, Freenow to call a taxi at your desired location and time."
+    },
+    {
+        "QUESTION": "What\u2019s the best way to get to the university?",
+        "ANSWER": "The most convenient way to reach the university is by using the metro and bus."
+    },
+    {
+        "QUESTION": "Is there a metro station near the university?",
+        "ANSWER": "There is a Stok\u0142osy metro station on the M1 route, just 3 minutes away from the university."
+    },
+    {
+        "QUESTION": "How can I travel from the university to the city center?",
+        "ANSWER": "You can reach the city center from the university in a short time, approximately 20 minutes, by using the metro."
+    },
+    {
+        "QUESTION": "Is there a discount for students in public transportation?",
+        "ANSWER": "In Poland, students with a valid student card get a 50% discount on public transport fares."
+    },
+    {
+        "QUESTION": "Where can I get a SIM card?",
+        "ANSWER": "In Poland, there are several mobile operators. You can visit the stores of major brands such as Play, Orange, and T-Mobile to obtain a SIM card."
+    },
+    {
+        "QUESTION": " How can I open a bank account?",
+        "ANSWER": "Each bank in Poland has its own customer policies, so the required documents to open an account may vary. Visit the bank you're interested in and submit the necessary documents to easily open an account."
+    },
+    {
+        "QUESTION": "Can I have accounts in currencies other than PLN in my bank account?",
+        "ANSWER": "While it varies by bank, you can typically open accounts in currencies other than PLN, such as euros and dollars."
+    }
+]

app.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import os
+import time
+import streamlit as st
+from qa_loader import load_qa_and_create_vectorstore
+from rag_chain import generate_response
+from dotenv import load_dotenv
+# 🔹 Load environment variables
+load_dotenv()
+# 🔹 Streamlit Page Configuration
+st.set_page_config(page_title="Vistula University AI Assistant", layout="centered")
+# 🔹 Title and Description
+st.title("📚 Vistula University AI Assistant")
+st.write("🚀 Ask me anything about Vistula University!")
+# 🔹 Retrieve Data (Cached for Performance)
+@st.cache_resource
+def get_retriever():
+    return load_qa_and_create_vectorstore()
+retriever = get_retriever()
+if isinstance(retriever, tuple):
+    retriever = retriever[0]
+# 🔹 Start or Load Chat History
+if "chat_history" not in st.session_state:
+    st.session_state.chat_history = []
+# 🔹 Display Chat History
+st.write("### 🗂️ Chat History")
+for entry in st.session_state.chat_history:
+    with st.chat_message("user"):
+        st.write(entry["question"])
+    with st.chat_message("assistant"):
+        st.write(entry["answer"])
+# 🔹 User Input
+query = st.chat_input("Ask your question about Vistula University!")
+# 🔹 Process When User Submits a Question
+if query:
+    with st.spinner("🤖 Thinking..."):
+        response = generate_response(retriever, query)
+    # 🔹 Add to Chat History
+    st.session_state.chat_history.append({
+        "question": query,
+        "answer": response
+    })
+    # 🔹 Display User Question and AI Response
+    with st.chat_message("user"):
+        st.write(query)
+    with st.chat_message("assistant"):
+        placeholder = st.empty()
+        current_text = ""
+        # Typing Effect
+        for word in response.split():
+            current_text += word + " "
+            placeholder.write(current_text)
+            time.sleep(0.05)

auto_tester.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import json
+import os
+import datetime
+from qa_loader import load_qa_and_create_vectorstore
+from rag_chain import generate_response
+from rapidfuzz import fuzz  # Benzerlik oranı hesaplamak için
+# Log klasörünü hazırla
+os.makedirs("logs", exist_ok=True)
+# Zaman damgalı log dosyası
+timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+log_file = f"logs/auto_test_results_{timestamp}.txt"
+# Vektör veritabanını yükle
+retriever = load_qa_and_create_vectorstore()
+# Q&A dosyasını oku
+with open("Q&A_cleaned.json", "r", encoding="utf-8") as f:
+    qa_data = json.load(f)
+# Performans istatistikleri
+total_questions = len(qa_data)
+correct_answers = 0
+incorrect_answers = 0
+# Minimum kabul edilebilir benzerlik oranı
+SIMILARITY_THRESHOLD = 60  # %60 eşleşme
+# Log dosyasını aç ve başlık ekle
+with open(log_file, "w", encoding="utf-8") as log:
+    log.write(f"Auto Test Run - {timestamp}\n")
+    log.write("=" * 80 + "\n")
+    for idx, item in enumerate(qa_data, start=1):
+        question = item['QUESTION']
+        expected_answer = item['ANSWER']
+        print(f"{idx}/{total_questions} Asking: {question}")
+        ai_response = generate_response(retriever, question)
+        # Benzerlik oranını hesapla
+        similarity_score = fuzz.ratio(expected_answer.lower(), ai_response.lower())
+        if similarity_score >= SIMILARITY_THRESHOLD:
+            result = f"✅ Correct (Similarity: {similarity_score:.2f}%)"
+            correct_answers += 1
+        else:
+            result = f"❌ Incorrect (Similarity: {similarity_score:.2f}%)"
+            incorrect_answers += 1
+        # Log'a yaz
+        log.write(f"Question {idx}/{total_questions}:\n")
+        log.write(f"Q: {question}\n")
+        log.write(f"Expected Answer: {expected_answer}\n")
+        log.write(f"AI Response: {ai_response}\n")
+        log.write(f"Similarity: {similarity_score:.2f}%\n")
+        log.write(f"Result: {result}\n")
+        log.write("-" * 80 + "\n")
+        print(f"🔎 {result} - Logged")
+    # Test sonrası performans özeti
+    accuracy = (correct_answers / total_questions) * 100
+    log.write("\nTEST SUMMARY\n")
+    log.write("=" * 80 + "\n")
+    log.write(f"Total Questions: {total_questions}\n")
+    log.write(f"Correct Answers: {correct_answers}\n")
+    log.write(f"Incorrect Answers: {incorrect_answers}\n")
+    log.write(f"Accuracy: {accuracy:.2f}%\n")
+    log.write("=" * 80 + "\n")
+# Sonuç özeti terminale yazdır
+print("\n🔔 TEST COMPLETED")
+print(f"✅ Correct: {correct_answers}")
+print(f"❌ Incorrect: {incorrect_answers}")
+print(f"📊 Accuracy: {accuracy:.2f}%")
+print(f"📂 Detailed log saved to: {log_file}")

pages/admin_dashboard.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import streamlit as st
+import pandas as pd
+import json
+import os
+from qa_loader import load_qa_and_create_vectorstore
+from langchain_chroma import Chroma
+# 🔹 Page Configuration
+st.set_page_config(page_title="Admin Dashboard", layout="wide")
+# 📂 Load available log files
+log_dir = "logs"
+log_files = [f for f in os.listdir(log_dir) if f.startswith("auto_test_results")]
+# 🔹 Read the latest log file
+if log_files:
+    latest_log = sorted(log_files)[-1]
+    log_path = os.path.join(log_dir, latest_log)
+    with open(log_path, "r", encoding="utf-8") as f:
+        log_data = f.readlines()
+else:
+    log_data = []
+# 📊 **Log Analysis**
+st.title("📊 Admin Dashboard")
+st.subheader("🔍 AI Model Log Analysis")
+if log_data:
+    correct_count = sum(1 for line in log_data if "✅ Correct" in line)
+    incorrect_count = sum(1 for line in log_data if "❌ Incorrect" in line)
+    total_count = correct_count + incorrect_count
+    accuracy = (correct_count / total_count) * 100 if total_count > 0 else 0
+    st.metric("✅ Correct Answers", correct_count)
+    st.metric("❌ Incorrect Answers", incorrect_count)
+    st.metric("🎯 Accuracy", f"{accuracy:.2f}%")
+    # **Most Frequently Asked Questions**
+    question_lines = [line for line in log_data if line.startswith("Q:")]
+    question_counts = pd.Series(question_lines).value_counts().head(10)
+    st.subheader("📌 Most Frequently Asked Questions")
+    st.write(question_counts)
+else:
+    st.warning("⚠️ No log records found. Please run the automated tests.")
+# 📚 **Q&A Data Update**
+st.subheader("📌 Update Q&A Database")
+# Load existing Q&A data
+qa_file = "Q&A_cleaned.json"
+if os.path.exists(qa_file):
+    with open(qa_file, "r", encoding="utf-8") as f:
+        qa_data = json.load(f)
+else:
+    qa_data = []
+# Add a new question
+new_question = st.text_input("📝 New Question:")
+new_answer = st.text_area("📌 Answer:")
+if st.button("💾 Save"):
+    if new_question and new_answer:
+        qa_data.append({"QUESTION": new_question, "ANSWER": new_answer})
+        with open(qa_file, "w", encoding="utf-8") as f:
+            json.dump(qa_data, f, indent=4)
+        st.success("✅ New question added!")
+    else:
+        st.warning("⚠️ Please enter both a question and an answer.")
+# **Edit Incorrect Answers**
+st.subheader("🔄 Edit Incorrect Answers")
+if qa_data:
+    question_list = [q["QUESTION"] for q in qa_data]
+    selected_question = st.selectbox("🔎 Select Question to Edit:", question_list)
+    selected_item = next((q for q in qa_data if q["QUESTION"] == selected_question), None)
+    updated_answer = st.text_area("✏️ Updated Answer:", selected_item["ANSWER"] if selected_item else "")
+    if st.button("📝 Update"):
+        if selected_item:
+            selected_item["ANSWER"] = updated_answer
+            with open(qa_file, "w", encoding="utf-8") as f:
+                json.dump(qa_data, f, indent=4)
+            st.success("✅ Answer updated!")
+# **Update Vector Database**
+st.subheader("🔄 Update Vector Database")
+if st.button("📥 Re-train"):
+    retriever = load_qa_and_create_vectorstore()
+    st.success("✅ Vector database successfully updated!")

qa_loader.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import json
+import os
+from langchain.schema import Document
+from langchain.text_splitter import CharacterTextSplitter
+from langchain_chroma import Chroma
+from langchain_huggingface import HuggingFaceEmbeddings
+# 🔹 Daha güçlü bir embedding modeli kullanarak eşleşmeleri iyileştiriyoruz
+embedding_model = HuggingFaceEmbeddings(
+    model_name="sentence-transformers/all-mpnet-base-v2",
+    encode_kwargs={"normalize_embeddings": True}
+)
+# 🔹 Q&A verisini yükleyip vektör veritabanı oluştur
+def load_qa_and_create_vectorstore():
+    with open("Q&A_cleaned.json", "r", encoding="utf-8") as f:
+        qa_data = json.load(f)
+    documents = [
+        Document(
+            page_content=f"Question: {item['QUESTION']}\nAnswer: {item['ANSWER']}",
+            metadata={}
+        )
+        for item in qa_data
+    ]
+    text_splitter = CharacterTextSplitter(chunk_size=800, chunk_overlap=200)
+    split_docs = text_splitter.split_documents(documents)
+    persist_directory = "./vistula_chroma"
+    # 🔹 Eğer eski veritabanı varsa, yeni veriyle yeniden oluştur
+    if os.path.exists(persist_directory):
+        os.system("rm -rf vistula_chroma")  # Eski vektör veritabanını siliyoruz
+    vectordb = Chroma.from_documents(split_docs, embedding_model, persist_directory=persist_directory)
+    return vectordb.as_retriever()

rag_chain.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import os
+from langchain_openai import ChatOpenAI
+from langchain_core.runnables import RunnableLambda
+from langchain_core.output_parsers import StrOutputParser
+from tavily import TavilyClient
+from dotenv import load_dotenv
+import datetime
+# 🔹 Load environment variables from .env file
+load_dotenv()
+# 🔹 Retrieve API keys from environment variables
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
+if not OPENAI_API_KEY or not TAVILY_API_KEY:
+    raise ValueError("❌ API keys are missing! Please check your .env file.")
+# 🔹 Initialize OpenAI and Tavily clients
+tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
+llm = ChatOpenAI(
+    model_name="llama3-8b-8192",
+    temperature=0,
+    streaming=False,  # Streaming is controlled by Streamlit
+    openai_api_key=OPENAI_API_KEY,
+    openai_api_base="https://api.groq.com/openai/v1"
+)
+# 🔎 Web search function using Tavily API
+def search_web_with_tavily(query):
+    if len(query) < 5:  # Ignore very short queries
+        return ""
+    print(f"🔍 Sending query to Tavily: {query}")
+    search_results = tavily_client.search(query=query, max_results=3)
+    # Extract and format the retrieved web results
+    snippets = [f"{result['title']}: {result['content']}" for result in search_results['results'] if 'content' in result]
+    print("✅ Web search results retrieved!")
+    return "\n".join(snippets) if snippets else ""
+# 📝 Prompt function for AI response generation
+def prompt_fn(query: str, context: str, web_context: str = "") -> str:
+    """
+    This is the main prompt template for the AI assistant.
+    The assistant must:
+    - Prioritize university knowledge first.
+    - Use web search only if internal knowledge is insufficient.
+    - If no relevant information is found, respond with:
+      "I’m sorry, but I don’t have information on this topic."
+    - Avoid unnecessary introductions, greetings, or explanations.
+    """
+    # Include web search results only if available
+    search_part = f"\nAdditionally, I found the following information from the web:\n{web_context}\n" if web_context else ""
+    return f"""
+    Below is the available information for answering student inquiries about Vistula University.
+    🔹 Follow this order when answering:
+    1️⃣ **Use internal university knowledge first.**
+    2️⃣ **If internal data lacks relevant details, use web search results.**
+    3️⃣ **If no useful information is found, respond with: "I’m sorry, but I don’t have information on this topic."**
+    🔹 Important Rules:
+    - **Do not start with introductions.** Provide the answer directly.
+    - **If no information is available, do not add lengthy explanations.**
+    - **Never make up or guess information.**
+    🔹 Available Information:
+    {context}
+    {search_part}
+    🔹 Question:
+    {query}
+    ---
+    ❗ **If no relevant information is found, simply say:**
+    - "I’m sorry, but I don’t have information on this topic."
+    """
+# 🔹 Define the AI pipeline (Prompt → LLM → Output Parsing)
+prompt_runnable = RunnableLambda(lambda inputs: prompt_fn(inputs["query"], inputs["context"], inputs.get("web_context", "")))
+rag_chain = prompt_runnable | llm | StrOutputParser()
+# 🔥 Response generation function
+def generate_response(retriever, query):
+    # Handle short greetings separately
+    if len(query.split()) <= 2 or query.lower() in ["hi", "hello", "help", "hey", "merhaba"]:
+        return "👋 Hi there! How can I assist you today? Please ask me a specific question about Vistula University."
+    # Retrieve relevant documents from the knowledge base
+    relevant_docs = retriever.invoke(query)
+    context = "\n".join([doc.page_content for doc in relevant_docs])
+    # If no useful data is found, return a short response
+    if not relevant_docs or len(context.strip()) < 20:
+        return "I’m sorry, but I don’t have information on this topic."
+    # Generate response using AI
+    inputs = {"query": query, "context": context}
+    response = rag_chain.invoke(inputs).strip()
+    return response if response else "I’m sorry, but I don’t have information on this topic."
+# 🔹 Logging function for tracking interactions
+def log_interaction(question, answer, source):
+    log_folder = "logs"
+    os.makedirs(log_folder, exist_ok=True)  # Ensure logs directory exists
+    log_file = os.path.join(log_folder, "chat_log.txt")
+    with open(log_file, "a", encoding="utf-8") as f:
+        timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")  # Add timestamp
+        f.write(f"{timestamp} | Question: {question}\n")  # Log user question
+        f.write(f"{timestamp} | Answer: {answer}\n")  # Log AI response
+        f.write(f"{timestamp} | Source: {source}\n")  # Indicate data source (VectorStore/Web)
+        f.write("-" * 80 + "\n")  # Separator for readability

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+langchain
+langchain-community
+langchain-openai
+langchain-chroma
+chromadb
+huggingface-hub
+langchain-huggingface
+tavily-python
+sentence-transformers
+torch
+streamlit
+python-dotenv