#!/usr/bin/env python
# coding: utf-8

# # DeepSeek R1 locally with Ollama and RAG application
# 
# Install, set up, and run DeepSeek-R1 locally with Ollama and build a simple RAG application.

import subprocess

# ## Step 1: Install dependencies
subprocess.run(["pip", "install", "ollama", "langchain", "chromadb", "gradio", "-U", "langchain-community", "pymupdf", "nbconvert", "jupyter"])

# ## Step 2: Test the response with DeepSeek-R1
import ollama

response = ollama.chat(
    model="deepseek-r1",
    messages=[
        {"role": "user", "content": "Explain Newton's second law of motion"},
    ],
)
print(response["message"]["content"])

# ## Step 3: RAG Pipeline with DeepSeek-R1
import gradio as gr
from langchain_community.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_community.embeddings import OllamaEmbeddings
import re

# Function to process PDFs
def process_pdf(pdf_file):
    if pdf_file is None:
        return None, None, None

    loader = PyMuPDFLoader(pdf_file.name)
    data = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
    chunks = text_splitter.split_documents(data)

    embeddings = OllamaEmbeddings(model="deepseek-r1")
    vectorstore = Chroma.from_documents(
        documents=chunks, embedding=embeddings, persist_directory="./chroma_db"
    )
    retriever = vectorstore.as_retriever()

    return text_splitter, vectorstore, retriever

# Function to process text
def process_text(text_data):
    if not text_data:
        return None, None, None

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
    chunks = text_splitter.create_documents([text_data])

    embeddings = OllamaEmbeddings(model="deepseek-r1")
    vectorstore = Chroma.from_documents(
        documents=chunks, embedding=embeddings, persist_directory="./chroma_db"
    )
    retriever = vectorstore.as_retriever()

    return text_splitter, vectorstore, retriever

# Combining retrieved document chunks
def combine_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# LLM function
def ollama_llm(question, context):
    formatted_prompt = f"Question: {question}\n\nContext: {context}"

    response = ollama.chat(
        model="deepseek-r1",
        messages=[{"role": "user", "content": formatted_prompt}],
    )

    response_content = response["message"]["content"]

    # Remove <think> tags
    cleaned_response = re.sub(r"<think>.*?</think>", "", response_content, flags=re.DOTALL).strip()

    return cleaned_response  # Returning cleaned response

# RAG Chain
def rag_chain(question, retriever):
    retrieved_docs = retriever.invoke(question)
    formatted_content = combine_docs(retrieved_docs)
    return ollama_llm(question, formatted_content)

# Gradio function
def ask_question(pdf_file, question):
    if pdf_file is None:
        return "Please upload a document."

    text_splitter, vectorstore, retriever = process_pdf(pdf_file)
    
    if retriever is None:
        return "Failed to process document."

    result = rag_chain(question, retriever)
    return result

# Gradio Interface
interface = gr.Interface(
    fn=ask_question,
    inputs=[
        gr.File(label="Upload document (PDF)"),
        gr.Textbox(label="Ask a question"),
    ],
    outputs=gr.Textbox(label="Answer"),
    title="DeepSeek-R1 RAG App",
    description="Upload a document and ask questions about its content.",
)

# Launch the Gradio app
if __name__ == "__main__":
    interface.launch(share=True)