Spaces:

josequinonez
/

Smart_Research_Assistant

Sleeping

App Files Files Community

josequinonez commited on Sep 5

Commit

03fb0c0

verified ·

1 Parent(s): cc3c752

Update app.py

Browse files

"facebook/bart-large-cnn"
Release memory

Files changed (1) hide show

app.py +16 -4

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
 import streamlit as st
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer # Use AutoModelForSeq2SeqLM for BART
 import torch
@@ -28,19 +27,25 @@ def summarize_bart(article):
     )
     summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return summary
 def answer_question_bart(article, question):
     """Answers a question based on an article using the facebook/bart-large-cnn model."""
     # For Q&A with BART, concatenate the question and article with a separator
-    # Let's use a prompt format similar to what worked in the notebook tests for BART QA
     input_text = f"Answer the question based on the following article.\n\nArticle: {article}\n\nQuestion: {question}\n\nAnswer:"
     # Tokenize the input
     inputs = tokenizer(input_text, return_tensors="pt", max_length=1024, truncation=True, padding=True)
     # Generate the answer
-    # Adjust generation parameters as needed for Q&A
     outputs = model.generate(
         inputs["input_ids"],
         attention_mask=inputs["attention_mask"],
@@ -67,6 +72,14 @@ def answer_question_bart(article, question):
     if answer.startswith("Answer:"): # Handle cases where the model might repeat "Answer:"
          answer = answer[len("Answer:"):].strip()
     return answer
@@ -103,5 +116,4 @@ if st.button("Process"):
             st.warning("Please provide an article to answer the question from.")
         elif not question_input:
             st.warning("Please provide a question to answer.")

 import streamlit as st
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer # Use AutoModelForSeq2SeqLM for BART
 import torch
     )
     summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # --- Memory Release ---
+    del inputs
+    del outputs
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    # --- End Memory Release ---
     return summary
 def answer_question_bart(article, question):
     """Answers a question based on an article using the facebook/bart-large-cnn model."""
     # For Q&A with BART, concatenate the question and article with a separator
     input_text = f"Answer the question based on the following article.\n\nArticle: {article}\n\nQuestion: {question}\n\nAnswer:"
     # Tokenize the input
     inputs = tokenizer(input_text, return_tensors="pt", max_length=1024, truncation=True, padding=True)
     # Generate the answer
     outputs = model.generate(
         inputs["input_ids"],
         attention_mask=inputs["attention_mask"],
     if answer.startswith("Answer:"): # Handle cases where the model might repeat "Answer:"
          answer = answer[len("Answer:"):].strip()
+    # --- Memory Release ---
+    del inputs
+    del outputs
+    # del generated_text # Be careful deleting generated_text if you need to return it
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    # --- End Memory Release ---
     return answer
             st.warning("Please provide an article to answer the question from.")
         elif not question_input:
             st.warning("Please provide a question to answer.")