Spaces:

datascientist22
/

rag-pdfQA-chatbot

Sleeping

App Files Files Community

datascientist22 commited on Sep 4, 2024

Commit

c1c7f8f

verified ·

1 Parent(s): 985d58e

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -26

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")
 model = AutoModelForCausalLM.from_pretrained(
     "himmeow/vi-gemma-2b-RAG",
     device_map="auto",
-    torch_dtype=torch.bfloat16
 )
 # Use GPU if available
@@ -34,25 +34,21 @@ if st.sidebar.button("Submit"):
         pdf_text = ""
         with BytesIO(uploaded_file.read()) as file:
             reader = PdfReader(file)
-            for page_num in range(len(reader.pages)):
-                page = reader.pages[page_num]
                 text = page.extract_text()
                 pdf_text += text + "\n"
         # Define the prompt format for the model
-        prompt = """
-        {}
-        Please answer the question: {}
-        {}
         """
-        # Format the input text
-        input_text = prompt.format(pdf_text, query, " ")
-        # Encode the input text into input ids
-        input_ids = tokenizer(input_text, return_tensors="pt")
         # Use GPU for input ids if available
         if torch.cuda.is_available():
@@ -61,20 +57,10 @@ if st.sidebar.button("Submit"):
         # Generate text using the model
         outputs = model.generate(
             **input_ids,
-            max_new_tokens=500,  # Limit the number of tokens generated
-            no_repeat_ngram_size=5,  # Prevent repetition of 5-gram phrases
         )
         # Decode and display the results
-        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Remove unwanted text fields from the response
-        clean_response = response.replace("### Instruction and Input:", "").replace("### Response:", "").strip()
-        st.write(clean_response)
-    else:
-        st.sidebar.error("Please upload a PDF file and enter a query.")
-# Footer with LinkedIn link
-st.sidebar.write("---")
-st.sidebar.write("Created by: [Engr. Hamesh Raj](https://www.linkedin.com/in/datascientisthameshraj/)")

 model = AutoModelForCausalLM.from_pretrained(
     "himmeow/vi-gemma-2b-RAG",
     device_map="auto",
+    torch_dtype=torch.float16  # Use FP16 for faster computation if supported
 )
 # Use GPU if available
         pdf_text = ""
         with BytesIO(uploaded_file.read()) as file:
             reader = PdfReader(file)
+            for page in reader.pages:
                 text = page.extract_text()
                 pdf_text += text + "\n"
         # Define the prompt format for the model
+        prompt = f"""
+        {pdf_text}
+        Please answer the question: {query}
         """
+        # Break the text into chunks if it's too long for the model
+        max_input_length = 2048  # Adjust based on the model's max length
+        input_ids = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_input_length)
         # Use GPU for input ids if available
         if torch.cuda.is_available():
         # Generate text using the model
         outputs = model.generate(
             **input_ids,
+            max_new_tokens=250,  # Reduce the number of tokens generated for faster results
+            no_repeat_ngram_size=3,  # Prevent repetition
+            num_beams=2,  # Use beam search with fewer beams for faster results
         )
         # Decode and display the results
+        response = tokenizer.decode(outputs[0], skip_special