Spaces:
Running
Running
import os | |
import gradio as gr | |
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
from llama_index.llms.llama_cpp import LlamaCPP | |
from llama_index.llms.llama_cpp.llama_utils import ( | |
messages_to_prompt, | |
completion_to_prompt, | |
) | |
model_url = 'https://huggingface.co/bartowski/OneLLM-Doey-ChatQA-V1-Llama-3.2-1B-GGUF/resolve/main/OneLLM-Doey-ChatQA-V1-Llama-3.2-1B-Q4_K_M.gguf' | |
llm = LlamaCPP( | |
# You can pass in the URL to a GGML model to download it automatically | |
model_url=model_url, | |
temperature=0.1, | |
max_new_tokens=256, | |
context_window=2048, | |
# kwargs to pass to __call__() | |
generate_kwargs={}, | |
# kwargs to pass to __init__() | |
# set to at least 1 to use GPU | |
model_kwargs={"n_gpu_layers": 1}, | |
# transform inputs into Llama2 format | |
messages_to_prompt=messages_to_prompt, | |
completion_to_prompt=completion_to_prompt, | |
verbose=True, | |
) | |
# Initialize embeddings and LLM | |
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5") | |
def initialize_index(): | |
"""Initialize the vector store index from PDF files in the data directory""" | |
# Load documents from the data directory | |
loader = SimpleDirectoryReader( | |
input_dir="data", | |
required_exts=[".pdf"] | |
) | |
documents = loader.load_data() | |
# Create index | |
index = VectorStoreIndex.from_documents( | |
documents, | |
embed_model=embeddings, | |
) | |
# Return query engine with Llama | |
return index.as_query_engine(llm=llm) | |
# Initialize the query engine at startup | |
query_engine = initialize_index() | |
def process_query( | |
message: str, | |
history: list[tuple[str, str]], | |
) -> str: | |
"""Process a query using the RAG system""" | |
try: | |
# Get response from the query engine | |
response = query_engine.query( | |
message, | |
#streaming=True | |
) | |
return str(response) | |
except Exception as e: | |
return f"Error processing query: {str(e)}" | |
# Create the Gradio interface | |
demo = gr.ChatInterface( | |
process_query, | |
title="Question Answering with PDF using RAG", | |
description="You can ask questions about Python Programming. The answers will be indexed from the book: A Byte of Python", | |
#undo_btn="Delete Previous", | |
#clear_btn="Clear", | |
) | |
if __name__ == "__main__": | |
demo.launch(debug=True) |