Spaces:

kk53
/

rag-lt-docs

Sleeping

App Files Files Community

helloworld53 commited on Feb 26, 2024

Commit

15fbc32

•

1 Parent(s): 7e48800

making rag file

Browse files

Files changed (1) hide show

app.py +89 -2

app.py CHANGED Viewed

@@ -1,4 +1,91 @@
 import streamlit as st
-x = st.slider('Select a value')
-st.write(x, 'squared is', x * x)

 import streamlit as st
+@st.cache_resource
+def load_resources():
+  import torch
+  from auto_gptq import AutoGPTQForCausalLM
+  from langchain import HuggingFacePipeline, PromptTemplate
+  from langchain.chains import RetrievalQA
+  from langchain.document_loaders import PyPDFDirectoryLoader
+  from langchain.embeddings import HuggingFaceBgeEmbeddings
+  from langchain.text_splitter import RecursiveCharacterTextSplitter
+  from langchain.vectorstores import Chroma
+  from pdf2image import convert_from_path
+  from transformers import AutoTokenizer, TextStreamer, pipeline
+  DEVICE = "cuda:0" if torch.cuda.is_available() else 'cpu'
+  loader = PyPDFDirectoryLoader("pdfs")
+  docs = loader.load()
+  embeddings = HuggingFaceBgeEmbeddings(
+    model_name = "BAAI/bge-base-en", model_kwargs = {"device" : DEVICE}
+    )
+  text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1024, chunk_overlap = 64)
+  texts = text_splitter.split_documents(docs)
+  db = Chroma.from_documents(texts, embeddings, persist_directory = 'db')
+  model_name_or_path = "TheBloke/Llama-2-13B-chat-GPTQ"
+  # model_basename = "gptq_model-4bit-128g"
+  tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast = True)
+  model = AutoGPTQForCausalLM.from_quantized(
+      model_name_or_path,
+      revision = "main",
+      # model_basename = model_basename,
+      use_safetensors = True,
+      trust_remote_code = True,
+      inject_fused_attention = False,
+      device = DEVICE,
+      quantize_config = None,
+      )
+  streamer = TextStreamer(tokenizer, skip_prompt = True, skip_special_tokens = True)
+  text_pipeline = pipeline("text-generation",
+                         model = model,
+                         tokenizer = tokenizer,
+                         max_new_tokens= 1024,
+                         temperature = 0,
+                         top_p = 0.95,
+                         repetition_penalty = 1.15,
+                         streamer = streamer,)
+  llm = HuggingFacePipeline(pipeline = text_pipeline, model_kwargs = {"temperature":0})
+  SYSTEM_PROMPT = "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer."
+  def generate_prompt(prompt: str, system_prompt : str = SYSTEM_PROMPT) -> str:
+    return f"""
+    [INST] <<SYS>>
+    {system_prompt}
+    <</SYS>>
+    {prompt} [/INST]
+    """.strip()
+  template = generate_prompt(
+      """
+      {context}
+      Question: {question}
+      """,
+      system_prompt = SYSTEM_PROMPT
+  )
+  prompt = PromptTemplate(template = template, input_variables = {"context", "question"})
+  qa_chain = RetrievalQA.from_chain_type(
+    llm = llm,
+    chain_type = "stuff",
+    retriever = db.as_retriever(search_kwargs = {"k" : 2}),
+    return_source_documents = True,
+    chain_type_kwargs = {"prompt" : prompt},
+    verbose = True)
+  return qa_chain
+st.title("Please ask your question on Lithuanian rules for foreigners.")
+qa_chain = load_resources()
+context = st.text_area("Enter the context:")
+question = st.text_input("Enter your question:")
+if context and question:
+    # Perform Question Answering
+    answer = qa_chain(context=context, question=question)
+    # Display the answer
+    st.header("Answer:")
+    st.write(answer)