Cloudflare-demo / app.py
BroBro87's picture
Upload 3 files
45b3faf
raw
history blame
No virus
2.68 kB
import logging
import sys
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
import git
from llama_index import SimpleDirectoryReader
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
documents = SimpleDirectoryReader("./").load_data()
import torch
from llama_index.llms import LlamaCPP
from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt
llm = LlamaCPP(
# You can pass in the URL to a GGML model to download it automatically
model_url='https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf',
# optionally, you can set the path to a pre-downloaded model instead of model_url
model_path=None,
temperature=0.1,
max_new_tokens=256,
# llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
context_window=3900,
# kwargs to pass to __call__()
generate_kwargs={},
# kwargs to pass to __init__()
# set to at least 1 to use GPU
model_kwargs={"n_gpu_layers": -1},
# transform inputs into Llama2 format
messages_to_prompt=messages_to_prompt,
completion_to_prompt=completion_to_prompt,
verbose=True,
)
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.embeddings import LangchainEmbedding
embed_model = LangchainEmbedding(
HuggingFaceEmbeddings(model_name="thenlper/gte-large")
)
service_context = ServiceContext.from_defaults(
chunk_size=256,
llm=llm,
embed_model=embed_model
)
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
# Create a Streamlit app file (e.g., app.py) and run it
import streamlit as st
from transformers import GPT2LMHeadModel, GPT2Tokenizer
def generate_response(prompt):
model_name = "gpt2"
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
input_ids = tokenizer.encode(prompt, return_tensors="pt")
output = model.generate(input_ids, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2)
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
return generated_text
def main():
st.title("Cloudflare RAG")
# User input
user_input = st.text_input("Enter your message:")
if user_input:
# Generate response
query_engine = index.as_query_engine()
response = query_engine.query(user_input)
# Display response
st.text_area("ChatGPT Response:", response, height=100)
if __name__ == "__main__":
main()