Spaces:
Build error
Build error
File size: 4,793 Bytes
726cf6e 5504904 726cf6e 5504904 726cf6e a7d801f 726cf6e 5504904 726cf6e 5504904 726cf6e 5504904 726cf6e 5504904 726cf6e 5504904 726cf6e 5504904 726cf6e 5504904 726cf6e 5504904 726cf6e 5504904 726cf6e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
# pip install --upgrade --force-reinstall llama-cpp-python --no-cache-dir >> upgraded
#================================================================================
# Developer: Soumen Dey
# Assignment : Gen-ai/CEP-1
# Env : Windows with CPU [No GPU]
# License : GPL
# - Steps:
# 1: Download the required model from LMStudio and save it to your local Disk
# 2: Install required python libs for the code
# Note: Tech stack: llama3 and gradio (assistant: local llama amd LMStudio)
#----------------------------------------------------------------------------------
import gradio as gr
import time
from llama_cpp import Llama
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
# 1. Load your LLaMA 3 model
#
local_doc_path = "E:/OLLAAMA/code/pdfChat/pdf/the_nestle_hr_policy_pdf_2012.pdf"
model_path_gguf = "C:/Users/soume/.lmstudio/models/PatronusAI/Llama-3-Patronus-Lynx-8B-Instruct-Q4_K_M-GGUF/patronus-lynx-8b-instruct-q4_k_m.gguf"
LOCAL_FILE = "the_nestle_hr_policy_pdf_2012.pdf"
MODEL_NAME = "patronus-lynx-8b-instruct-q4_k_m.gguf"
llm = Llama(
model_path = model_path_gguf,
n_ctx=2048,
n_threads=8,
stream=True
)
#-------------- Load the data -------------------------
# Inserting PDF
from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_community.document_loaders import OnlinePDFLoader
data = ""
#load pdf
if local_doc_path:
loader = UnstructuredPDFLoader(file_path=local_doc_path)
data = loader.load()
print("loaded.")
else:
print("upload a pdf file")
#---------- End
#-------------- GET THE CHUNKS =-----------------------
#split and chunk
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=7500
,chunk_overlap=100
,separators=["\n\n", "\n", ".", " ", ""], # Tries these in order
)
chunks = text_splitter.split_documents(data)
#------------------------------------------------------
# 2. Build the vector DB (or load it from disk)
localIndex = "faiss_index_v1"
# embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# Load local embedding model (use a real embedding model if possible)
embedding_model = HuggingFaceEmbeddings(
# model_name="hkunlp/instructor-large", - Full local GGUF
model_name = "sentence-transformers/all-MiniLM-L6-v2",
model_kwargs = {"device": "cpu"}
)
vector_db = FAISS.from_documents(chunks, embedding_model)
vector_db.save_local(localIndex)
# 3. Chat function with RAG + streaming
def chat_fn(message, history):
# Get context from vector store
docs_with_scores = vector_db.similarity_search_with_score(message, k=2)
context = "\n".join([doc.page_content for doc, score in docs_with_scores])
# Truncate to 1000 characters (or adjust as needed)
context = context[:1000]
# Create prompt
prompt = "You are a helpful assistant. Use the context to answer questions.\n"
prompt += f"Context:\n{context}\n\n"
for user, bot in history:
prompt += f"User: {user}\nAssistant: {bot}\n"
prompt += f"User: {message}\nAssistant:"
# Generate with streaming (typing effect)
response = ""
for chunk in llm(prompt, max_tokens=512, stop=["User:"], stream=True):
token = chunk["choices"][0]["text"]
response += token
yield response
time.sleep(0.02)
# 4. Launch the Gradio chat UI
# gr.ChatInterface(
# fn=chat_fn,
# title="LLaMA 3 + Local Vector DB Chat",
# description="powered by llama3/hf: (Rimbik) π€, \nsearch anything for the pdf 'PROFESSIONAL CERTIFICATE COURSE IN GENERATIVE AI AND MACHINE LEARNING'"
# ,theme="soft",
# ).launch(share=True)
keys = [
("File in process", "category1"),
("Model Name", "category1")
]
colors = {
"category1": "red",
"category2": "orange",
"category3": "yellow",
"category4": "green",
"category5": "blue",
"category6": "indigo",
}
desc = f"File in process:{LOCAL_FILE}, Model Name :{MODEL_NAME}, powered by llama-3/hf: (Rimbik) π€"
with gr.Blocks() as demo:
# highlighted_text = gr.HighlightedText(value=header, labels=keys, colors=colors)
gr.ChatInterface(
fn=chat_fn,
title="LLaMA 3 πͺ + Local Vector DB Chat: π€",
description = desc
,theme="soft",
)
if __name__ == "__main__":
demo.launch(share=False) # Set True for live public url
#------------------------- EOF ---------------------------------------------------Date :May-4/2025
|