TestRAGonPDFs / app.py
UldisKK's picture
experiment
c07b2c3
raw history blame
No virus
3.62 kB
import streamlit as st
import os
import textwrap
import torch
import chromadb
import langchain
import openai
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader, UnstructuredPDFLoader, YoutubeLoader
from langchain.embeddings import HuggingFaceEmbeddings, OpenAIEmbeddings, HuggingFaceInstructEmbeddings
from langchain.indexes import VectorstoreIndexCreator
from langchain.llms import OpenAI, HuggingFacePipeline
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.prompts import PromptTemplate
#from auto_gptq import AutoGPTQForCausalLM
from transformers import AutoTokenizer, pipeline, logging, TextStreamer
from langchain.document_loaders.image import UnstructuredImageLoader
x = st.slider('Select a value')
st.write(x, 'squared is', x * x)
current_working_directory = os.getcwd()
print(current_working_directory)
st.write('current dir:', current_working_directory)
arr = os.listdir('.')
st.write('dir contents:',arr)
def print_response(response: str):
print("\n".join(textwrap.wrap(response, width=100)))
pdf_loader = UnstructuredPDFLoader("./pdfs/Predicting issue types on GitHub.pdf")
pdf_pages = pdf_loader.load_and_split()
st.write('total pages from PDFs:', len(pdf_pages))
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=512)
texts = text_splitter.split_documents(pdf_pages)
st.write('total chunks from pages:', len(texts))
st.write('loading chunks into vector db')
model_name = "hkunlp/instructor-large"
hf_embeddings = HuggingFaceInstructEmbeddings(
model_name = model_name)
# db = Chroma.from_documents(texts, hf_embeddings)
st.write('loading tokenizer')
#model_name_or_path = "TheBloke/Llama-2-13B-chat-GPTQ"
model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF"
#tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
model_basename = "model"
use_triton = False
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
st.write('loading LLM')
model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
model_basename=model_basename,
use_safetensors=True,
trust_remote_code=True,
device=DEVICE,
use_triton=use_triton,
quantize_config=None)
st.write('setting up the chain')
streamer = TextStreamer(tokenizer, skip_prompt = True, skip_special_tokens = True)
text_pipeline = pipeline(task = 'text-generation', model = model, tokenizer = tokenizer, streamer = streamer)
llm = HuggingFacePipeline(pipeline = text_pipeline)
def generate_prompt(prompt, sys_prompt):
return f"[INST] <<SYS>> {sys_prompt} <</SYS>> {prompt} [/INST]"
sys_prompt = "Use following piece of context to answer the question in less than 20 words"
template = generate_prompt(
"""
{context}
Question : {question}
"""
, sys_prompt)
prompt = PromptTemplate(template=template, input_variables=["context", "question"])
chain_type_kwargs = {"prompt": prompt}
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=db.as_retriever(search_kwargs={"k": 2}),
return_source_documents = True,
chain_type_kwargs=chain_type_kwargs,
)
st.write('READY!!!')
q1="what the author worked on ?"
q2="where did author study?"
q3="what author did ?"
result = qa_chain(q1)
st.write('question:', q1, 'result:', result)
result = qa_chain(q2)
st.write('question:', q2, 'result:', result)
result = qa_chain(q3)
st.write('question:', q3, 'result:', result)