File size: 3,621 Bytes
d955896
a2753a1
44218ea
 
 
 
 
 
 
 
 
 
 
 
 
 
c07b2c3
44218ea
 
d955896
 
 
a2753a1
 
 
b7b4a3d
44218ea
 
874cdc5
 
 
 
 
 
 
 
 
 
 
81e3d37
 
 
 
7737abe
874cdc5
7737abe
7bd9296
 
7737abe
379d516
81e3d37
 
7737abe
81e3d37
 
7737abe
81e3d37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import streamlit as st
import os
import textwrap
import torch
import chromadb
import langchain
import openai
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader, UnstructuredPDFLoader, YoutubeLoader
from langchain.embeddings import HuggingFaceEmbeddings, OpenAIEmbeddings, HuggingFaceInstructEmbeddings
from langchain.indexes import VectorstoreIndexCreator
from langchain.llms import OpenAI, HuggingFacePipeline
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.prompts import PromptTemplate
#from auto_gptq import AutoGPTQForCausalLM
from transformers import AutoTokenizer, pipeline, logging, TextStreamer
from langchain.document_loaders.image import UnstructuredImageLoader

x = st.slider('Select a value')
st.write(x, 'squared is', x * x)

current_working_directory = os.getcwd()
print(current_working_directory)
st.write('current dir:', current_working_directory)
arr = os.listdir('.')
st.write('dir contents:',arr)

def print_response(response: str):
    print("\n".join(textwrap.wrap(response, width=100)))

pdf_loader = UnstructuredPDFLoader("./pdfs/Predicting issue types on GitHub.pdf")
pdf_pages = pdf_loader.load_and_split()
st.write('total pages from PDFs:', len(pdf_pages))
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=512)
texts = text_splitter.split_documents(pdf_pages)
st.write('total chunks from pages:', len(texts))

st.write('loading chunks into vector db')
model_name = "hkunlp/instructor-large"
hf_embeddings = HuggingFaceInstructEmbeddings(
    model_name = model_name)
# db = Chroma.from_documents(texts, hf_embeddings)

st.write('loading tokenizer')
#model_name_or_path = "TheBloke/Llama-2-13B-chat-GPTQ"
model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF"
#tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

model_basename = "model"

use_triton = False
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
st.write('loading LLM')
model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
        model_basename=model_basename,
        use_safetensors=True,
        trust_remote_code=True,
        device=DEVICE,
        use_triton=use_triton,
        quantize_config=None)

st.write('setting up the chain')
streamer = TextStreamer(tokenizer, skip_prompt = True, skip_special_tokens = True)
text_pipeline = pipeline(task = 'text-generation', model = model, tokenizer = tokenizer, streamer = streamer)
llm = HuggingFacePipeline(pipeline = text_pipeline)

def generate_prompt(prompt, sys_prompt):
    return f"[INST] <<SYS>> {sys_prompt} <</SYS>> {prompt} [/INST]"

sys_prompt = "Use following piece of context to answer the question in less than 20 words"
template = generate_prompt(
    """
    {context}

    Question : {question}
    """
    , sys_prompt)

prompt = PromptTemplate(template=template, input_variables=["context", "question"])

chain_type_kwargs = {"prompt": prompt}
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db.as_retriever(search_kwargs={"k": 2}),
    return_source_documents = True,
    chain_type_kwargs=chain_type_kwargs,
)
st.write('READY!!!')

q1="what the author worked on ?"
q2="where did author study?"
q3="what author did ?"
result = qa_chain(q1)
st.write('question:', q1, 'result:', result)

result = qa_chain(q2)
st.write('question:', q2, 'result:', result)

result = qa_chain(q3)
st.write('question:', q3, 'result:', result)