| | import os |
| | import torch |
| | from transformers import pipeline |
| | from langchain_huggingface import HuggingFaceEmbeddings |
| | from langchain_chroma import Chroma |
| | from config import EMBEDDING_MODEL, LLM_MODEL, CHROMA_DIR, LLM_TASK |
| |
|
| | |
| | embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL) |
| |
|
| | |
| | if os.path.exists(CHROMA_DIR) and any(os.scandir(CHROMA_DIR)): |
| | vectordb = Chroma(persist_directory=CHROMA_DIR, embedding_function=embeddings) |
| | print("✅ Vector DB loaded successfully") |
| | else: |
| | vectordb = None |
| | print("⚠️ Vector DB folder missing or empty") |
| |
|
| | |
| | qa_pipeline = pipeline( |
| | LLM_TASK, |
| | model=LLM_MODEL, |
| | device_map="cpu", |
| | max_new_tokens=256, |
| | trust_remote_code=True, |
| | model_kwargs={"torch_dtype": torch.float32} |
| | ) |
| |
|
| | def ask_rag_with_status(question: str): |
| | if vectordb is None: |
| | return "Knowledge base not ready.", "ERROR" |
| |
|
| | |
| | docs = vectordb.similarity_search(question, k=3) |
| | context = "\n".join([d.page_content for d in docs]) |
| | |
| | |
| | prompt = f"Context:\n{context}\n\nQuestion: {question}\n\nAnswer:" |
| | |
| | try: |
| | |
| | result = qa_pipeline( |
| | prompt, |
| | do_sample=False, |
| | temperature=0.0, |
| | pad_token_id=qa_pipeline.tokenizer.eos_token_id |
| | ) |
| | |
| | full_output = result[0]["generated_text"] |
| | |
| | |
| | if "Answer:" in full_output: |
| | answer = full_output.split("Answer:")[-1].strip() |
| | else: |
| | answer = full_output.strip() |
| |
|
| | if not answer: |
| | answer = "I found context in the documents but could not generate a coherent summary. Please rephrase." |
| |
|
| | return answer, ["Context retrieved", "Qwen generated answer"] |
| | |
| | except Exception as e: |
| | print(f"❌ Generation error: {e}") |
| | return "The model timed out while thinking. Try a shorter question.", "TIMEOUT" |