File size: 2,238 Bytes
10b392a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
from dotenv import load_dotenv
load_dotenv()
import os

from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_openai import ChatOpenAI
from langchain.chains import create_retrieval_chain
from langchain import hub

retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat")
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

# β€”β€”β€” CONFIGURATION β€”β€”β€”
DATA_PATH     = "data.txt"
OLLAMA_URL    = os.getenv(
    "OLLAMA_SERVER",
    "https://chandimaprabath-ollama-server.hf.space"
)
EMBED_MODEL   = "nomic-embed-text:latest"
LLM_API_KEY   = os.getenv("LLM_API_KEY")
LLM_API_BASE  = "https://llm.chutes.ai/v1"
LLM_MODEL     = "chutesai/Llama-4-Scout-17B-16E-Instruct"
LANGSMITH_API_KEY = os.getenv("LANGSMITH_API_KEY")
CHUNK_SIZE    = 1000
CHUNK_OVERLAP = 0
TOP_K         = 5
# β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”


def main():
    # 1) Load & split
    docs = TextLoader(DATA_PATH).load()
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=CHUNK_SIZE,
        chunk_overlap=CHUNK_OVERLAP
    )

    chunks = splitter.split_documents(docs)
    print(f"β†’ Split into {len(chunks)} chunks")

    # 2) Embed & index remotely
    embedder = OllamaEmbeddings(
        base_url=OLLAMA_URL, model=EMBED_MODEL
    )
    vector_store = Chroma.from_documents(chunks, embedder)

    # 3) Configure remote-only LLM
    llm = ChatOpenAI(
        api_key=LLM_API_KEY,
        base_url=LLM_API_BASE,
        model=LLM_MODEL
    )
    # 4) Build RAG chain with LCEL-style helpers
    retriever = vector_store.as_retriever(search_kwargs={"k": TOP_K})
    combine_chain = create_stuff_documents_chain(llm=llm, prompt=retrieval_qa_chat_prompt)
    retrieval_chain = create_retrieval_chain(retriever, combine_chain)


    # 5) Run query
    query = "Who jumped over the lazy dog?"
    print("πŸ”Ž Query:", query)
    result = retrieval_chain.invoke({"input": query})
    print("\nπŸ“ Answer:\n", result)


if __name__ == "__main__":
    main()