Spaces:

tmnam20
/

code-summarization

Running

App Files Files Community

tmnam20 commited on Nov 6, 2024

Commit

55ae524

1 Parent(s): 3b62d42

feat

Browse files

Files changed (2) hide show

app.py +11 -61
st_utils.py +11 -7

app.py CHANGED Viewed

@@ -12,15 +12,15 @@ import torch
 # list_files(os.getcwd())
 # Set the title and description of the app
-st.title("Text Summarization App")
 st.write(
     """
 This app uses the Hugging Face transformers library to generate summaries of input text.
-Simply select one of the sample Python functions from the dropdown menu below, and click the 'Summarize' button to generate a summary.
 """
 )
-st.write(f"Has CUDA: {torch.cuda.is_available()}")
 # Download the model from the Hugging Face Hub if it doesn't exist
 download_model()
@@ -36,14 +36,13 @@ values = [
     "def search(data, target):\n    for i in range(len(data)):\n        if data[i] == target:\n            return i\n    return -1",
 ]
-st.subheader("Select a sample Python function:")
-selected_value = st.selectbox("", values)
 # Create a text input area for the user to enter their text
 text_input = st.text_area(
-    "Or enter your Python function here:",
-    height=300,
-    value=values[0],
 )
@@ -56,63 +55,14 @@ def generate_summary(text):
 # When the user clicks the 'Summarize' button, generate a summary
 if st.button("Summarize") and (len(selected_value) > 0 or len(text_input) > 0):
     with st.spinner("Generating summary..."):
-        if len(selected_value) > 0:
-            summaries = generate_summary(selected_value)
             st.subheader("Docstrings:")
             for i, summary in enumerate(summaries):
                 st.write(f"{i + 1}. " + summary)
         else:
-            summaries = generate_summary(text_input)
             st.subheader("Docstrings:")
             for i, summary in enumerate(summaries):
                 st.write(f"{i + 1}. " + summary)
-# import streamlit as st
-# from st_utils import load_tokenizer_and_model, generate_docstring, download_model
-# # Download the model from the Hugging Face Hub if it doesn't exist
-# # Set the title and description of the app
-# st.title("Text Summarization App")
-# st.write(
-#     """
-# This app uses the Hugging Face transformers library to generate summaries of input text.
-# Simply enter your text in the input area below, and click the 'Summarize' button to generate a summary.
-# """
-# )
-# tokenizer, model, device = load_tokenizer_and_model("./models/pytorch_model.bin")
-# # Create a text input area for the user to enter their text
-# values = [
-#     "def multiply(a, b):\n    return a * b",
-#     "def get_data():\n    data = []\n    for i in range(10):\n        data.append(i)\n    return data",
-#     "def search(data, target):\n    for i in range(len(data)):\n        if data[i] == target:\n            return i\n    return -1",
-# ]
-# st.subheader("Enter your Python function here:")
-# text_input = st.text_area(
-#     "Input text here...",
-#     height=300,
-#     value=values[2],
-# )
-# # Define a function to generate a summary
-# def generate_summary(text):
-#     summary = generate_docstring(model, tokenizer, device, text, max_length=30)
-#     return summary
-# # When the user clicks the 'Summarize' button, generate a summary
-# if st.button("Summarize") and len(text_input) > 0:
-#     with st.spinner("Generating summary..."):
-#         # summary = generate_summary(text_input)
-#         # st.write("Summary:")
-#         # st.code(summary, language="text")
-#         summaries = generate_summary(text_input)
-#         st.subheader("Summary:")
-#         for i, summary in enumerate(summaries):
-#             st.write(f"{i + 1}. " + summary)

 # list_files(os.getcwd())
 # Set the title and description of the app
+st.title("Code Function Summarization App")
 st.write(
     """
 This app uses the Hugging Face transformers library to generate summaries of input text.
+Simply select one of the sample Python functions from the dropdown menu below, and click the 'Summarize' button to generate a summary for the corresponding function.
 """
 )
+# st.write(f"Has CUDA: {torch.cuda.is_available()}")
 # Download the model from the Hugging Face Hub if it doesn't exist
 download_model()
     "def search(data, target):\n    for i in range(len(data)):\n        if data[i] == target:\n            return i\n    return -1",
 ]
+selected_value = st.selectbox("Select a sample Python function:", values)
 # Create a text input area for the user to enter their text
 text_input = st.text_area(
+    "Or enter your Python function here (prioritize this over the dropdown menu):",
+    height=256,
+    value=selected_value,
 )
 # When the user clicks the 'Summarize' button, generate a summary
 if st.button("Summarize") and (len(selected_value) > 0 or len(text_input) > 0):
     with st.spinner("Generating summary..."):
+        if len(text_input) > 0:
+            summaries = generate_summary(text_input)
             st.subheader("Docstrings:")
             for i, summary in enumerate(summaries):
                 st.write(f"{i + 1}. " + summary)
+        # if len(selected_value) > 0:
         else:
+            summaries = generate_summary(selected_value)
             st.subheader("Docstrings:")
             for i, summary in enumerate(summaries):
                 st.write(f"{i + 1}. " + summary)

st_utils.py CHANGED Viewed

@@ -93,7 +93,7 @@ class CONFIG:
 # download model with streamlit cache decorator
-@st.cache(persist=False, show_spinner=True, allow_output_mutation=True)
 def download_model():
     if not os.path.exists(r"models/pytorch_model.bin"):
         os.makedirs("./models", exist_ok=True)
@@ -108,7 +108,8 @@ def download_model():
 # load with streamlit cache decorator
-@st.cache(persist=False, show_spinner=True, allow_output_mutation=True)
 def load_tokenizer_and_model(pretrained_path):
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -122,7 +123,7 @@ def load_tokenizer_and_model(pretrained_path):
         CONFIG.config_name if CONFIG.config_name else CONFIG.model_name_or_path,
         cache_dir=CONFIG.cache_dir,
     )
-    model_config.save_pretrained("config")
     # load tokenizer
     tokenizer = tokenizer_class.from_pretrained(
@@ -159,28 +160,31 @@ def load_tokenizer_and_model(pretrained_path):
             map_location=device,
         )
     except RuntimeError as e:
         try:
             state_dict = torch.load(
                 os.path.join(os.getcwd(), "models", "pytorch_model.bin"),
                 map_location="cpu",
             )
         except RuntimeError as e:
             state_dict = torch.load(
                 os.path.join(os.getcwd(), "models", "pytorch_model_cpu.bin"),
                 map_location="cpu",
             )
     model.load_state_dict(state_dict)
-    model = model.to("cpu")
-    torch.save(
-        model.state_dict(), os.path.join(os.getcwd(), "models", "pytorch_model_cpu.bin")
-    )
     model = model.to(device)
     return tokenizer, model, device
 def preprocessing(code_segment):
     # remove newlines
     code_segment = re.sub(r"\n", " ", code_segment)

 # download model with streamlit cache decorator
+@st.cache_resource
 def download_model():
     if not os.path.exists(r"models/pytorch_model.bin"):
         os.makedirs("./models", exist_ok=True)
 # load with streamlit cache decorator
+# @st.cache(persist=False, show_spinner=True, allow_output_mutation=True)
+@st.cache_resource
 def load_tokenizer_and_model(pretrained_path):
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         CONFIG.config_name if CONFIG.config_name else CONFIG.model_name_or_path,
         cache_dir=CONFIG.cache_dir,
     )
+    # model_config.save_pretrained("config")
     # load tokenizer
     tokenizer = tokenizer_class.from_pretrained(
             map_location=device,
         )
     except RuntimeError as e:
+        print(e)
         try:
             state_dict = torch.load(
                 os.path.join(os.getcwd(), "models", "pytorch_model.bin"),
                 map_location="cpu",
             )
         except RuntimeError as e:
+            print(e)
             state_dict = torch.load(
                 os.path.join(os.getcwd(), "models", "pytorch_model_cpu.bin"),
                 map_location="cpu",
             )
+    del state_dict["encoder.embeddings.position_ids"]
     model.load_state_dict(state_dict)
+    # model = model.to("cpu")
+    # torch.save(model.state_dict(), os.path.join(os.getcwd(), "models", "pytorch_model_cpu.bin"))
     model = model.to(device)
     return tokenizer, model, device
+@st.cache_data
 def preprocessing(code_segment):
     # remove newlines
     code_segment = re.sub(r"\n", " ", code_segment)