blogspace / app.py
MikeTysonFury's picture
done
5d4ba4f
import gradio as gr
from llama_index import SimpleDirectoryReader, ServiceContext
from llama_index.llms import HuggingFaceLLM
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.embeddings import LangchainEmbedding
import torch
import chromadb
from chromadb.utils import embedding_functions
# Configure Llama2 Model
llm = HuggingFaceLLM(
context_window=4096,
max_new_tokens=256,
generate_kwargs={"temperature": 0.0, "do_sample": False},
tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
model_name="meta-llama/Llama-2-7b-chat-hf",
device_map="auto",
model_kwargs={"torch_dtype": torch.float16 , "load_in_8bit": True}
)
# Set Up Embedding Model
embed_model = LangchainEmbedding(
HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
)
# Create Query Function
def query_chatbot(query):
service_context = ServiceContext.from_defaults(
chunk_size=1024,
llm=llm,
embed_model=embed_model
)
# Initialize ChromaDB
client = chromadb.Client()
collection = client.create_collection(
"documents",
embedding_function=embedding_functions.HuggingFaceEmbeddingFunction(model_name="sentence-transformers/all-mpnet-base-v2")
)
# Load documents (you can customize this part)
documents = SimpleDirectoryReader("/path/to/your/data").load_data()
# Use GPTSimpleIndex as an alternative to VectorStoreIndex
from llama_index.indices import GPTSimpleIndex
index = GPTSimpleIndex.from_documents(documents, service_context=service_context, vector_store=collection)
# Create Query Engine
query_engine = index.as_query_engine()
response = query_engine.query(query)
return response
# Create Gradio Interface
iface = gr.Interface(
fn=query_chatbot,
inputs="text",
outputs="text",
title="Blog Assistant Chatbot"
)
iface.launch()