Spaces:

datascientist22
/

rag-pdfQA-chatbot

Sleeping

App Files Files Community

datascientist22 commited on Sep 4, 2024

Commit

72b04bc

verified ·

1 Parent(s): c1c7f8f

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -23

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")
 model = AutoModelForCausalLM.from_pretrained(
     "himmeow/vi-gemma-2b-RAG",
     device_map="auto",
-    torch_dtype=torch.float16  # Use FP16 for faster computation if supported
 )
 # Use GPU if available
@@ -34,33 +34,51 @@ if st.sidebar.button("Submit"):
         pdf_text = ""
         with BytesIO(uploaded_file.read()) as file:
             reader = PdfReader(file)
-            for page in reader.pages:
                 text = page.extract_text()
                 pdf_text += text + "\n"
-        # Define the prompt format for the model
-        prompt = f"""
-        {pdf_text}
-        Please answer the question: {query}
-        """
-        # Break the text into chunks if it's too long for the model
-        max_input_length = 2048  # Adjust based on the model's max length
-        input_ids = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_input_length)
-        # Use GPU for input ids if available
-        if torch.cuda.is_available():
-            input_ids = input_ids.to("cuda")
-        # Generate text using the model
-        outputs = model.generate(
-            **input_ids,
-            max_new_tokens=250,  # Reduce the number of tokens generated for faster results
-            no_repeat_ngram_size=3,  # Prevent repetition
-            num_beams=2,  # Use beam search with fewer beams for faster results
-        )
-        # Decode and display the results
-        response = tokenizer.decode(outputs[0], skip_special

 model = AutoModelForCausalLM.from_pretrained(
     "himmeow/vi-gemma-2b-RAG",
     device_map="auto",
+    torch_dtype=torch.bfloat16
 )
 # Use GPU if available
         pdf_text = ""
         with BytesIO(uploaded_file.read()) as file:
             reader = PdfReader(file)
+            for page_num in range(len(reader.pages)):
+                page = reader.pages[page_num]
                 text = page.extract_text()
                 pdf_text += text + "\n"
+        # Chunk the text to fit within model limits
+        max_chunk_size = 2000  # Adjust as needed for your model's token limit
+        chunks = [pdf_text[i:i + max_chunk_size] for i in range(0, len(pdf_text), max_chunk_size)]
+        responses = []
+        for chunk in chunks:
+            prompt = f"""
+            {chunk}
+            Please answer the question: {query}
+            """
+            # Encode the input text into input ids
+            input_ids = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
+            # Use GPU for input ids if available
+            if torch.cuda.is_available():
+                input_ids = input_ids.to("cuda")
+            # Generate text using the model
+            outputs = model.generate(
+                **input_ids,
+                max_new_tokens=250,  # Reduce the number of tokens generated
+                no_repeat_ngram_size=3,  # Adjust for faster generation
+                num_beams=2,  # Use beam search with fewer beams for faster results
+            )
+            # Decode and store the response
+            response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+            responses.append(response)
+        # Combine responses and display them
+        combined_response = "\n".join(responses)
+        clean_response = combined_response.replace("### Instruction and Input:", "").replace("### Response:", "").strip()
+        st.write(clean_response)
+    else:
+        st.sidebar.error("Please upload a PDF file and enter a query.")
+# Footer with LinkedIn link
+st.sidebar.write("---")
+st.sidebar.write("Created by: [Engr. Hamesh Raj](https://www.linkedin.com/in/datascientisthameshraj/)")