| import warnings |
| warnings.filterwarnings(action='ignore') |
| import torch |
| from langchain_community.vectorstores import FAISS |
| from langchain_huggingface import HuggingFaceEmbeddings |
| from common.rag.document_loader import fetch_document_chunks |
| from dotenv import load_dotenv |
| load_dotenv() |
|
|
|
|
| def fetch_vectorstore_retriever(): |
| """ |
| Create and return a FAISS-based retriever for graphology/handwriting analysis documents. |
| |
| This function: |
| - Loads sentence-transformers/all-MiniLM-L6-v2 embeddings (GPU if available) |
| - Builds a FAISS vector store from document chunks obtained via fetch_document_chunks() |
| - Returns a similarity search retriever configured to return top 10 most relevant chunks |
| |
| Returns |
| ------- |
| langchain_core.retrievers.BaseRetriever |
| Configured FAISS retriever ready to be used with .invoke() or .get_relevant_documents() |
| |
| Notes |
| ----- |
| - The vector store is **recreated from scratch every time** this function is called. |
| - This can be slow on first run or when document collection is large. |
| - Consider caching/persisting the vectorstore in production for better performance. |
| - Uses normalize_embeddings=True → cosine similarity is used internally. |
| """ |
|
|
| embeddings = HuggingFaceEmbeddings( |
| model_name="sentence-transformers/all-MiniLM-L6-v2", |
| model_kwargs={'device': 'cuda' if torch.cuda.is_available() else 'cpu'}, |
| encode_kwargs={'normalize_embeddings': True} |
| ) |
|
|
| vectorstore = FAISS.from_documents( |
| documents=fetch_document_chunks(), |
| embedding=embeddings |
| ) |
|
|
| retriever = vectorstore.as_retriever( |
| search_type="similarity", |
| search_kwargs={"k": 10} |
| ) |
|
|
| return retriever |
|
|
|
|
| def fetch_relevant_document(topic="None"): |
| """ |
| Retrieve relevant document chunks for graphological analysis of a specific topic/trait. |
| |
| Constructs a detailed, structured query optimized for finding handwriting analysis content, |
| then retrieves the top 10 most similar document chunks from the FAISS vector store. |
| |
| Parameters |
| ---------- |
| topic : str, default="None" |
| Personality trait, psychological characteristic, writing style aspect or any topic |
| for which handwriting analysis information is requested. |
| Examples: "ambition", "emotional stability", "aggressiveness", "introversion" |
| |
| Returns |
| ------- |
| str |
| Concatenated string containing up to 10 relevant document chunks, each prefixed |
| with "[Document N]" for clear identification in the RAG context. |
| Returns empty context string if topic is "None" or no relevant chunks are found. |
| |
| Notes |
| ----- |
| - The query is intentionally very specific and structured to improve retrieval quality |
| for handwriting/graphology related content. |
| - Uses similarity (cosine) search with k=10 (top 10 results). |
| - The returned context is meant to be directly passed into a RAG prompt for LLM analysis. |
| """ |
|
|
| retriever = fetch_vectorstore_retriever() |
| query = ( |
| f"Handwriting sample analysis for: {topic}\n" |
| "Extract and summarize: \n" |
| "- Observed writing style characteristics (slant, pressure, size, speed, spacing, margins, baseline, letter forms, connections, etc.)\n" |
| "- Graphological interpretations of personality traits linked to those features\n" |
| "- Overall psychological or personality impression" |
| ) |
| docs = retriever.invoke(query) |
| context = "\n\n".join(f"[Document {i+1}]\n{doc.page_content}\n" for i, doc in enumerate(docs)) |
| return context |