Spaces:

seanpedrickcase
/

Light-PDF-Web-QA-Chatbot

Sleeping

App Files Files Community

Sean-Case commited on Oct 13, 2023

Commit

d213c15

•

1 Parent(s): d5a8385

Improved advanced model prompt, added stop generation button. context prompt improvements

Browse files

Files changed (3) hide show

app.py +7 -5
chatfuncs/chatfuncs.py +93 -42
chatfuncs/ingest.py +1 -1

app.py CHANGED Viewed

@@ -90,12 +90,14 @@ def load_model(model_type, gpu_layers, gpu_config=None, cpu_config=None, torch_d
         print(vars(cpu_config))
         try:
-            model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
-            #model = AutoModelForCausalLM.from_pretrained('Aryanne/Sheared-LLaMA-1.3B-gguf', model_type='llama', model_file='q8_0-sheared-llama-1.3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
             #model = AutoModelForCausalLM.from_pretrained('TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF', model_type='llama', model_file='tinyllama-1.1b-1t-openorca.Q8_0.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
         except:
-            model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **vars(cpu_config)) #**asdict(CtransRunConfig_gpu())
-            #model = AutoModelForCausalLM.from_pretrained('Aryanne/Sheared-LLaMA-1.3B-gguf', model_type='llama', model_file='q8_0-sheared-llama-1.3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
             #model = AutoModelForCausalLM.from_pretrained('TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF', model_type='llama', model_file='tinyllama-1.1b-1t-openorca.Q8_0.gguf', **vars(cpu_config)) # **asdict(CtransRunConfig_cpu())
@@ -228,7 +230,7 @@ with block:
     with gr.Tab("Advanced features"):
         model_choice = gr.Radio(label="Choose a chat model", value="Flan Alpaca", choices = ["Flan Alpaca", "Orca Mini"])
         with gr.Row():
-            gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU (WARNING: please don't modify unless you have a GPU).", value=0, minimum=0, maximum=6, step = 1, visible=True)
             change_model_button = gr.Button(value="Load model", scale=0)
         load_text = gr.Text(label="Load status")

         print(vars(cpu_config))
         try:
+            #model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
+            model = AutoModelForCausalLM.from_pretrained('Aryanne/Orca-Mini-3B-gguf', model_type='llama', model_file='q5_0-orca-mini-3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
+            #model = AutoModelForCausalLM.from_pretrained('Aryanne/Wizard-Orca-3B-gguf', model_type='llama', model_file='q4_1-wizard-orca-3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
             #model = AutoModelForCausalLM.from_pretrained('TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF', model_type='llama', model_file='tinyllama-1.1b-1t-openorca.Q8_0.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
         except:
+            #model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **vars(cpu_config)) #**asdict(CtransRunConfig_gpu())
+            model = AutoModelForCausalLM.from_pretrained('Aryanne/Orca-Mini-3B-gguf', model_type='llama', model_file='q5_0-orca-mini-3b.gguf', **vars(cpu_config)) #**asdict(CtransRunConfig_gpu())
+            #model = AutoModelForCausalLM.from_pretrained('Aryanne/Wizard-Orca-3B-gguf', model_type='llama', model_file='q4_1-wizard-orca-3b.gguf', **vars(cpu_config)) # **asdict(CtransRunConfig_cpu())
             #model = AutoModelForCausalLM.from_pretrained('TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF', model_type='llama', model_file='tinyllama-1.1b-1t-openorca.Q8_0.gguf', **vars(cpu_config)) # **asdict(CtransRunConfig_cpu())
     with gr.Tab("Advanced features"):
         model_choice = gr.Radio(label="Choose a chat model", value="Flan Alpaca", choices = ["Flan Alpaca", "Orca Mini"])
         with gr.Row():
+            gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU (WARNING: please don't modify unless you have a GPU).", value=0, minimum=0, maximum=6, step = 1, visible=False)
             change_model_button = gr.Button(value="Load model", scale=0)
         load_text = gr.Text(label="Load status")

chatfuncs/chatfuncs.py CHANGED Viewed

@@ -12,7 +12,7 @@ from threading import Thread
 from transformers import pipeline, TextIteratorStreamer
 # Alternative model sources
-from dataclasses import asdict, dataclass
 # Langchain functions
 from langchain.prompts import PromptTemplate
@@ -55,8 +55,8 @@ model = [] # Define empty list for model functions to run
 tokenizer = [] # Define empty list for model functions to run
 ## Highlight text constants
-hlt_chunk_size = 15
-hlt_strat = [" ", ".", "!", "?", ":", "\n\n", "\n", ","]
 hlt_overlap = 4
 ## Initialise NER model ##
@@ -217,58 +217,106 @@ def base_prompt_templates(model_type = "Flan Alpaca"):
 # The main prompt:
     instruction_prompt_template_alpaca_quote = """### Instruction:
-    Quote directly from the SOURCE below that best answers the QUESTION. Only quote full sentences in the correct order. If you cannot find an answer, start your response with "My best guess is: ".
-    CONTENT: {summaries}
-    QUESTION: {question}
-    Response:"""
     instruction_prompt_template_alpaca = """### Instruction:
-    ### User:
-    Answer the QUESTION using information from the following CONTENT.
-    CONTENT: {summaries}
-    QUESTION: {question}
-    Response:"""
-    instruction_prompt_template_sheared_llama = """Answer the QUESTION using information from the following CONTENT.
-    CONTENT: {summaries}
-    QUESTION: {question}
-    Answer:"""
     instruction_prompt_template_orca = """
-    ### System:
-    You are an AI assistant that follows instruction extremely well. Help as much as you can.
-    ### User:
-    Answer the QUESTION with a short response using information from the following CONTENT.
-    CONTENT: {summaries}
-    QUESTION: {question}
-    ### Response:"""
     instruction_prompt_mistral_orca = """<|im_start|>system\n
-    You are an AI assistant that follows instruction extremely well. Help as much as you can.
-    <|im_start|>user\n
-    Answer the QUESTION using information from the following CONTENT. Respond with short answers that directly answer the question.
-    CONTENT: {summaries}
-    QUESTION: {question}\n
-    Answer:<|im_end|>"""
     instruction_prompt_tinyllama_orca = """<|im_start|>system\n
-    You are an AI assistant that follows instruction extremely well. Help as much as you can.
-    <|im_start|>user\n
-    Answer the QUESTION using information from the following CONTENT. Only quote text that directly answers the question and nothing more. If you can't find an answer to the question, respond with "Sorry, I can't find an answer to that question.".
-    CONTENT: {summaries}
-    QUESTION: {question}\n
-    Answer:<|im_end|>"""
     if model_type == "Flan Alpaca":
         INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_template_alpaca, input_variables=['question', 'summaries'])
     elif model_type == "Orca Mini":
-        INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_template_orca, input_variables=['question', 'summaries'])
     return INSTRUCTION_PROMPT, CONTENT_PROMPT
@@ -281,7 +329,7 @@ def generate_expanded_prompt(inputs: Dict[str, str], instruction_prompt, content
         new_question_kworded = adapt_q_from_chat_history(question, chat_history, extracted_memory) # new_question_keywords,
-        docs_keep_as_doc, doc_df, docs_keep_out = hybrid_retrieval(new_question_kworded, vectorstore, embeddings, k_val = 5, out_passages = 1,
                                                                           vec_score_cut_off = 1, vec_weight = 1, bm25_weight = 1, svm_weight = 1)#,
                                                                           #vectorstore=globals()["vectorstore"], embeddings=globals()["embeddings"])
@@ -382,6 +430,8 @@ def produce_streaming_answer_chatbot(history, full_prompt, model_type):
         gen_config = CtransGenGenerationConfig()
         # Pull the generated text from the streamer, and update the model output.
         start = time.time()
         NUM_TOKENS=0
@@ -633,7 +683,8 @@ def get_expanded_passages(vectorstore, docs, width):
         return ''.join(content), meta[0], meta[-1]
     def get_parent_content_and_meta(vstore_docs, width, target):
-        target_range = range(max(0, target - width), min(len(vstore_docs), target + width + 1))
         parent_vstore_out = [vstore_docs[i] for i in target_range]
         content_str_out, meta_first_out, meta_last_out = [], [], []

 from transformers import pipeline, TextIteratorStreamer
 # Alternative model sources
+#from dataclasses import asdict, dataclass
 # Langchain functions
 from langchain.prompts import PromptTemplate
 tokenizer = [] # Define empty list for model functions to run
 ## Highlight text constants
+hlt_chunk_size = 12
+hlt_strat = [" ", ". ", "! ", "? ", ": ", "\n\n", "\n", ", "]
 hlt_overlap = 4
 ## Initialise NER model ##
 # The main prompt:
     instruction_prompt_template_alpaca_quote = """### Instruction:
+Quote directly from the SOURCE below that best answers the QUESTION. Only quote full sentences in the correct order. If you cannot find an answer, start your response with "My best guess is: ".
+CONTENT: {summaries}
+QUESTION: {question}
+Response:"""
     instruction_prompt_template_alpaca = """### Instruction:
+### User:
+Answer the QUESTION using information from the following CONTENT.
+CONTENT: {summaries}
+QUESTION: {question}
+Response:"""
+    instruction_prompt_template_openllama = """Answer the QUESTION using information from the following CONTENT.
+QUESTION - {question}
+CONTENT - {summaries}
+Answer:"""
+    instruction_prompt_template_platypus = """### Instruction:
+Answer the QUESTION using information from the following CONTENT.
+CONTENT: {summaries}
+QUESTION: {question}
+### Response:"""
+    instruction_prompt_template_wizard_orca_quote = """### HUMAN:
+Quote text from the CONTENT to answer the QUESTION below.
+CONTENT - {summaries}
+QUESTION - {question}
+### RESPONSE:
+"""
+    instruction_prompt_template_wizard_orca = """### HUMAN:
+Answer the QUESTION below based on the CONTENT. Only refer to CONTENT that directly answers the question.
+CONTENT - {summaries}
+QUESTION - {question}
+### RESPONSE:
+"""
     instruction_prompt_template_orca = """
+### System:
+You are an AI assistant that follows instruction extremely well. Help as much as you can.
+### User:
+Answer the QUESTION with a short response using information from the following CONTENT.
+QUESTION: {question}
+CONTENT: {summaries}
+### Response:"""
+    instruction_prompt_template_orca_quote = """
+### System:
+You are an AI assistant that follows instruction extremely well. Help as much as you can.
+### User:
+Quote text from the CONTENT to answer the QUESTION below.
+QUESTION: {question}
+CONTENT: {summaries}
+### Response:
+"""
+    instruction_prompt_template_orca_rev = """
+### System:
+You are an AI assistant that follows instruction extremely well. Help as much as you can.
+### User:
+Answer the QUESTION with a short response using information from the following CONTENT.
+QUESTION: {question}
+CONTENT: {summaries}
+### Response:"""
     instruction_prompt_mistral_orca = """<|im_start|>system\n
+You are an AI assistant that follows instruction extremely well. Help as much as you can.
+<|im_start|>user\n
+Answer the QUESTION using information from the following CONTENT. Respond with short answers that directly answer the question.
+CONTENT: {summaries}
+QUESTION: {question}\n
+Answer:<|im_end|>"""
     instruction_prompt_tinyllama_orca = """<|im_start|>system\n
+You are an AI assistant that follows instruction extremely well. Help as much as you can.
+<|im_start|>user\n
+Answer the QUESTION using information from the following CONTENT. Only quote text that directly answers the question and nothing more. If you can't find an answer to the question, respond with "Sorry, I can't find an answer to that question.".
+CONTENT: {summaries}
+QUESTION: {question}\n
+Answer:<|im_end|>"""
+    instruction_prompt_marx = """
+### HUMAN:
+Answer the QUESTION using information from the following CONTENT.
+CONTENT: {summaries}
+QUESTION: {question}
+### RESPONSE:
+"""
     if model_type == "Flan Alpaca":
         INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_template_alpaca, input_variables=['question', 'summaries'])
     elif model_type == "Orca Mini":
+        INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_template_wizard_orca, input_variables=['question', 'summaries'])
     return INSTRUCTION_PROMPT, CONTENT_PROMPT
         new_question_kworded = adapt_q_from_chat_history(question, chat_history, extracted_memory) # new_question_keywords,
+        docs_keep_as_doc, doc_df, docs_keep_out = hybrid_retrieval(new_question_kworded, vectorstore, embeddings, k_val = 10, out_passages = 2,
                                                                           vec_score_cut_off = 1, vec_weight = 1, bm25_weight = 1, svm_weight = 1)#,
                                                                           #vectorstore=globals()["vectorstore"], embeddings=globals()["embeddings"])
         gen_config = CtransGenGenerationConfig()
+        print(vars(gen_config))
         # Pull the generated text from the streamer, and update the model output.
         start = time.time()
         NUM_TOKENS=0
         return ''.join(content), meta[0], meta[-1]
     def get_parent_content_and_meta(vstore_docs, width, target):
+        #target_range = range(max(0, target - width), min(len(vstore_docs), target + width + 1))
+        target_range = range(max(0, target), min(len(vstore_docs), target + width + 1)) # Now only selects extra passages AFTER the found passage
         parent_vstore_out = [vstore_docs[i] for i in target_range]
         content_str_out, meta_first_out, meta_last_out = [], [], []

chatfuncs/ingest.py CHANGED Viewed

@@ -38,7 +38,7 @@ from pypdf import PdfReader
 PandasDataFrame = TypeVar('pd.core.frame.DataFrame')
 # -
-split_strat = ["\n\n", "\n", ".", "!", "?",  ","]
 chunk_size = 500
 chunk_overlap = 0
 start_index = True

 PandasDataFrame = TypeVar('pd.core.frame.DataFrame')
 # -
+split_strat = ["\n\n", "\n", ". ", "! ", "? "]
 chunk_size = 500
 chunk_overlap = 0
 start_index = True