Spaces:

RimJames
/

rimspace01

Build error

App Files Files Community

rimspace01 / app.py

RimJames

Update app.py

726cf6e verified 2 months ago

raw

history blame contribute delete

4.79 kB

	# pip install --upgrade --force-reinstall llama-cpp-python --no-cache-dir >> upgraded

	#================================================================================
	# Developer: Soumen Dey
	# Assignment : Gen-ai/CEP-1
	# Env : Windows with CPU [No GPU]
	# License : GPL
	# - Steps:
	# 1: Download the required model from LMStudio and save it to your local Disk
	# 2: Install required python libs for the code

	# Note: Tech stack: llama3 and gradio (assistant: local llama amd LMStudio)
	#----------------------------------------------------------------------------------

	import gradio as gr
	import time
	from llama_cpp import Llama
	from langchain.vectorstores import FAISS
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.schema import Document

	from langchain_community.embeddings import OllamaEmbeddings
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_community.vectorstores import Chroma

	# 1. Load your LLaMA 3 model
	#
	local_doc_path = "E:/OLLAAMA/code/pdfChat/pdf/the_nestle_hr_policy_pdf_2012.pdf"
	model_path_gguf = "C:/Users/soume/.lmstudio/models/PatronusAI/Llama-3-Patronus-Lynx-8B-Instruct-Q4_K_M-GGUF/patronus-lynx-8b-instruct-q4_k_m.gguf"

	LOCAL_FILE = "the_nestle_hr_policy_pdf_2012.pdf"
	MODEL_NAME = "patronus-lynx-8b-instruct-q4_k_m.gguf"

	llm = Llama(
	model_path = model_path_gguf,
	n_ctx=2048,
	n_threads=8,
	stream=True
	)

	#-------------- Load the data -------------------------
	# Inserting PDF
	from langchain_community.document_loaders import UnstructuredPDFLoader
	from langchain_community.document_loaders import OnlinePDFLoader

	data = ""

	#load pdf
	if local_doc_path:
	loader = UnstructuredPDFLoader(file_path=local_doc_path)
	data = loader.load()
	print("loaded.")
	else:
	print("upload a pdf file")
	#---------- End


	#-------------- GET THE CHUNKS =-----------------------
	#split and chunk
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=7500
	,chunk_overlap=100
	,separators=["\n\n", "\n", ".", " ", ""], # Tries these in order
	)

	chunks = text_splitter.split_documents(data)

	#------------------------------------------------------

	# 2. Build the vector DB (or load it from disk)
	localIndex = "faiss_index_v1"

	# embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
	# Load local embedding model (use a real embedding model if possible)
	embedding_model = HuggingFaceEmbeddings(
	# model_name="hkunlp/instructor-large", - Full local GGUF
	model_name = "sentence-transformers/all-MiniLM-L6-v2",
	model_kwargs = {"device": "cpu"}
	)

	vector_db = FAISS.from_documents(chunks, embedding_model)
	vector_db.save_local(localIndex)


	# 3. Chat function with RAG + streaming
	def chat_fn(message, history):
	# Get context from vector store
	docs_with_scores = vector_db.similarity_search_with_score(message, k=2)
	context = "\n".join([doc.page_content for doc, score in docs_with_scores])

	# Truncate to 1000 characters (or adjust as needed)
	context = context[:1000]

	# Create prompt
	prompt = "You are a helpful assistant. Use the context to answer questions.\n"
	prompt += f"Context:\n{context}\n\n"
	for user, bot in history:
	prompt += f"User: {user}\nAssistant: {bot}\n"
	prompt += f"User: {message}\nAssistant:"

	# Generate with streaming (typing effect)
	response = ""
	for chunk in llm(prompt, max_tokens=512, stop=["User:"], stream=True):
	token = chunk["choices"][0]["text"]
	response += token
	yield response
	time.sleep(0.02)

	# 4. Launch the Gradio chat UI
	# gr.ChatInterface(
	# fn=chat_fn,
	# title="LLaMA 3 + Local Vector DB Chat",
	# description="powered by llama3/hf: (Rimbik) 🤗, \nsearch anything for the pdf 'PROFESSIONAL CERTIFICATE COURSE IN GENERATIVE AI AND MACHINE LEARNING'"
	# ,theme="soft",
	# ).launch(share=True)


	keys = [
	("File in process", "category1"),
	("Model Name", "category1")
	]
	colors = {
	"category1": "red",
	"category2": "orange",
	"category3": "yellow",
	"category4": "green",
	"category5": "blue",
	"category6": "indigo",
	}

	desc = f"File in process:{LOCAL_FILE}, Model Name :{MODEL_NAME}, powered by llama-3/hf: (Rimbik) 🤗"

	with gr.Blocks() as demo:
	# highlighted_text = gr.HighlightedText(value=header, labels=keys, colors=colors)
	gr.ChatInterface(
	fn=chat_fn,
	title="LLaMA 3 🐪 + Local Vector DB Chat: 🤖",
	description = desc
	,theme="soft",
	)

	if __name__ == "__main__":
	demo.launch(share=False) # Set True for live public url

	#------------------------- EOF ---------------------------------------------------Date :May-4/2025