#!/usr/bin/env python # coding: utf-8 # # DeepSeek R1 locally with Ollama and RAG application # # Install, set up, and run DeepSeek-R1 locally with Ollama and build a simple RAG application. import subprocess # ## Step 1: Install dependencies subprocess.run(["pip", "install", "ollama", "langchain", "chromadb", "gradio", "-U", "langchain-community", "pymupdf", "nbconvert", "jupyter"]) # ## Step 2: Test the response with DeepSeek-R1 import ollama response = ollama.chat( model="deepseek-r1", messages=[ {"role": "user", "content": "Explain Newton's second law of motion"}, ], ) print(response["message"]["content"]) # ## Step 3: RAG Pipeline with DeepSeek-R1 import gradio as gr from langchain_community.document_loaders import PyMuPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.vectorstores import Chroma from langchain_community.embeddings import OllamaEmbeddings import re # Function to process PDFs def process_pdf(pdf_file): if pdf_file is None: return None, None, None loader = PyMuPDFLoader(pdf_file.name) data = loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100) chunks = text_splitter.split_documents(data) embeddings = OllamaEmbeddings(model="deepseek-r1") vectorstore = Chroma.from_documents( documents=chunks, embedding=embeddings, persist_directory="./chroma_db" ) retriever = vectorstore.as_retriever() return text_splitter, vectorstore, retriever # Function to process text def process_text(text_data): if not text_data: return None, None, None text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100) chunks = text_splitter.create_documents([text_data]) embeddings = OllamaEmbeddings(model="deepseek-r1") vectorstore = Chroma.from_documents( documents=chunks, embedding=embeddings, persist_directory="./chroma_db" ) retriever = vectorstore.as_retriever() return text_splitter, vectorstore, retriever # Combining retrieved document chunks def combine_docs(docs): return "\n\n".join(doc.page_content for doc in docs) # LLM function def ollama_llm(question, context): formatted_prompt = f"Question: {question}\n\nContext: {context}" response = ollama.chat( model="deepseek-r1", messages=[{"role": "user", "content": formatted_prompt}], ) response_content = response["message"]["content"] # Remove tags cleaned_response = re.sub(r".*?", "", response_content, flags=re.DOTALL).strip() return cleaned_response # Returning cleaned response # RAG Chain def rag_chain(question, retriever): retrieved_docs = retriever.invoke(question) formatted_content = combine_docs(retrieved_docs) return ollama_llm(question, formatted_content) # Gradio function def ask_question(pdf_file, question): if pdf_file is None: return "Please upload a document." text_splitter, vectorstore, retriever = process_pdf(pdf_file) if retriever is None: return "Failed to process document." result = rag_chain(question, retriever) return result # Gradio Interface interface = gr.Interface( fn=ask_question, inputs=[ gr.File(label="Upload document (PDF)"), gr.Textbox(label="Ask a question"), ], outputs=gr.Textbox(label="Answer"), title="DeepSeek-R1 RAG App", description="Upload a document and ask questions about its content.", ) # Launch the Gradio app if __name__ == "__main__": interface.launch(share=True)