Spaces:
Running
Running
import streamlit as st | |
import os | |
import textwrap | |
import torch | |
import chromadb | |
import langchain | |
import openai | |
from langchain.chains import RetrievalQA | |
from langchain.chat_models import ChatOpenAI | |
from langchain.document_loaders import TextLoader, UnstructuredPDFLoader, YoutubeLoader | |
from langchain.embeddings import HuggingFaceEmbeddings, OpenAIEmbeddings, HuggingFaceInstructEmbeddings | |
from langchain.indexes import VectorstoreIndexCreator | |
from langchain.llms import OpenAI, HuggingFacePipeline | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.vectorstores import Chroma | |
from langchain.prompts import PromptTemplate | |
from auto_gptq import AutoGPTQForCausalLM | |
from transformers import AutoTokenizer, pipeline, logging, TextStreamer | |
from langchain.document_loaders.image import UnstructuredImageLoader | |
x = st.slider('Select a value') | |
st.write(x, 'squared is', x * x) | |
current_working_directory = os.getcwd() | |
print(current_working_directory) | |
st.write('current dir:', current_working_directory) | |
arr = os.listdir('.') | |
st.write('dir contents:',arr) | |
def print_response(response: str): | |
print("\n".join(textwrap.wrap(response, width=100))) | |
pdf_loader = UnstructuredPDFLoader("./pdfs/Predicting issue types on GitHub.pdf") | |
pdf_pages = pdf_loader.load_and_split() | |
st.write('total pages from PDFs:', len(pdf_pages)) | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=512) | |
texts = text_splitter.split_documents(pdf_pages) | |
st.write('total chunks from pages:', len(texts)) | |
st.write('loading chunks into vector db') | |
model_name = "hkunlp/instructor-large" | |
hf_embeddings = HuggingFaceInstructEmbeddings( | |
model_name = model_name) | |
# db = Chroma.from_documents(texts, hf_embeddings) | |
st.write('loading tokenizer') | |
#model_name_or_path = "TheBloke/Llama-2-13B-chat-GPTQ" | |
model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF" | |
#tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True) | |
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) | |
model_basename = "model" | |
use_triton = False | |
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu" | |
st.write('loading LLM') | |
model = AutoGPTQForCausalLM.from_quantized(model_name_or_path, | |
model_basename=model_basename, | |
use_safetensors=True, | |
trust_remote_code=True, | |
device=DEVICE, | |
use_triton=use_triton, | |
quantize_config=None) | |
st.write('setting up the chain') | |
streamer = TextStreamer(tokenizer, skip_prompt = True, skip_special_tokens = True) | |
text_pipeline = pipeline(task = 'text-generation', model = model, tokenizer = tokenizer, streamer = streamer) | |
llm = HuggingFacePipeline(pipeline = text_pipeline) | |
def generate_prompt(prompt, sys_prompt): | |
return f"[INST] <<SYS>> {sys_prompt} <</SYS>> {prompt} [/INST]" | |
sys_prompt = "Use following piece of context to answer the question in less than 20 words" | |
template = generate_prompt( | |
""" | |
{context} | |
Question : {question} | |
""" | |
, sys_prompt) | |
prompt = PromptTemplate(template=template, input_variables=["context", "question"]) | |
chain_type_kwargs = {"prompt": prompt} | |
qa_chain = RetrievalQA.from_chain_type( | |
llm=llm, | |
chain_type="stuff", | |
retriever=db.as_retriever(search_kwargs={"k": 2}), | |
return_source_documents = True, | |
chain_type_kwargs=chain_type_kwargs, | |
) | |
st.write('READY!!!') | |
q1="what the author worked on ?" | |
q2="where did author study?" | |
q3="what author did ?" | |
result = qa_chain(q1) | |
st.write('question:', q1, 'result:', result) | |
result = qa_chain(q2) | |
st.write('question:', q2, 'result:', result) | |
result = qa_chain(q3) | |
st.write('question:', q3, 'result:', result) | |