Spaces:

gagan3012
/

summarization

Runtime error

App Files Files Community

gagan3012 commited on Aug 17, 2021

Commit

f49c162

•

1 Parent(s): 01dffc6

Fixes and updates

Browse files

Files changed (5) hide show

app.py +14 -1
setup.py +1 -1
src/data/process_data.py +9 -10
src/models/predict_model.py +1 -1
t5s/cli.py +13 -13

app.py CHANGED Viewed

@@ -1,6 +1,19 @@
 import streamlit as st
-from src.models.predict_model import predict_model
 def visualize():
     st.write("# Summarization  UI")

 import streamlit as st
+import yaml
+from src.models.model import Summarization
+def predict_model(text: str):
+    """
+    Predict the summary of the given text.
+    """
+    with open("model_params.yml") as f:
+        params = yaml.safe_load(f)
+    model = Summarization()
+    model.load_model(model_type=params["model_type"], model_dir="gagan3012/summarsiation")
+    pre_summary = model.predict(text)
+    return pre_summary
 def visualize():
     st.write("# Summarization  UI")

setup.py CHANGED Viewed

@@ -12,7 +12,7 @@ with open('requirements.txt') as f:
 setup(
     name='t5s',
     packages=find_packages(include=['t5s*']),
-    version='2.0.4',
     description="T5 Summarisation Using Pytorch Lightning",
     license='MIT License',
     classifiers=[

 setup(
     name='t5s',
     packages=find_packages(include=['t5s*']),
+    version='2.0.5',
     description="T5 Summarisation Using Pytorch Lightning",
     license='MIT License',
     classifiers=[

src/data/process_data.py CHANGED Viewed

@@ -2,18 +2,17 @@ import pandas as pd
 import yaml
-def process_data(split="train"):
-    with open("data_params.yml") as f:
-        params = yaml.safe_load(f)
     df = pd.read_csv("data/raw/{}.csv".format(split))
     df.columns = ["Unnamed: 0", "input_text", "output_text"]
-    df = df.sample(frac=params["split"], replace=True, random_state=1)
-    df.to_csv("data/processed/{}.csv".format(split))
 if __name__ == "__main__":
-    process_data(split="train")
-    process_data(split="test")
-    process_data(split="validation")

 import yaml
+def process_data(frac=0.1, split="train"):
     df = pd.read_csv("data/raw/{}.csv".format(split))
     df.columns = ["Unnamed: 0", "input_text", "output_text"]
+    df_new = df.sample(frac=frac, replace=True, random_state=1)
+    df_new.to_csv("data/processed/{}.csv".format(split))
 if __name__ == "__main__":
+    with open("data_params.yml") as f:
+        params = yaml.safe_load(f)
+    process_data(frac=params['split'], split="train")
+    process_data(frac=params['split'], split="test")
+    process_data(frac=params['split'], split="validation")

src/models/predict_model.py CHANGED Viewed

@@ -11,6 +11,6 @@ def predict_model(text: str):
         params = yaml.safe_load(f)
     model = Summarization()
-    model.load_model(model_type=params["model_type"], model_dir="gagan3012/summarsiation")
     pre_summary = model.predict(text)
     return pre_summary

         params = yaml.safe_load(f)
     model = Summarization()
+    model.load_model(model_type=params["model_type"], model_dir=params["model_dir"])
     pre_summary = model.predict(text)
     return pre_summary

t5s/cli.py CHANGED Viewed

@@ -22,16 +22,16 @@ parser_start.add_argument(
     "-d",
     "--dataset",
     default="cnn_dailymail",
-    help="Enter the name of the dataset to be used",type=str
 )
-parser_start.add_argument("-s", "--split", default=0.001, help="Enter the split required",type=float)
 parser_start.add_argument(
     "-n", "--name", default="summarsiation", help="Enter the name of the model"
 )
 parser_start.add_argument(
-    "-mt", "--model_type", default="t5", help="Enter the model type",type=str
 )
 parser_start.add_argument(
     "-m",
@@ -113,25 +113,25 @@ class Run(object):
         elif arguments["command"] == "start":
             os.chdir("./summarization/")
             print("""
-            usage: t5s start [-h] [-d DATASET] [-s SPLIT] [-n NAME] [-mt MODEL_TYPE]
                  [-m MODEL_NAME] [-e EPOCHS] [-lr LEARNING_RATE]
                  [-b BATCH_SIZE]
-  -h, --help            show this help message and exit
-  -d DATASET, --dataset DATASET
                         Enter the name of the dataset to be used
-  -s SPLIT, --split SPLIT
                         Enter the split required
-  -n NAME, --name NAME  Enter the name of the model
-  -mt MODEL_TYPE, --model_type MODEL_TYPE
                         Enter the model type
-  -m MODEL_NAME, --model_name MODEL_NAME
                         Enter the model to be used eg t5-base
-  -e EPOCHS, --epochs EPOCHS
                         Enter the number of epochs
-  -lr LEARNING_RATE, --learning-rate LEARNING_RATE
                         Enter the number of epochs
-  -b BATCH_SIZE, --batch-size BATCH_SIZE
                         Enter the number of batches
             """)
             start(arguments=arguments)

     "-d",
     "--dataset",
     default="cnn_dailymail",
+    help="Enter the name of the dataset to be used", type=str
 )
+parser_start.add_argument("-s", "--split", default=0.001, help="Enter the split required", type=float)
 parser_start.add_argument(
     "-n", "--name", default="summarsiation", help="Enter the name of the model"
 )
 parser_start.add_argument(
+    "-mt", "--model_type", default="t5", help="Enter the model type", type=str
 )
 parser_start.add_argument(
     "-m",
         elif arguments["command"] == "start":
             os.chdir("./summarization/")
             print("""
+usage: t5s start [-h] [-d DATASET] [-s SPLIT] [-n NAME] [-mt MODEL_TYPE]
                  [-m MODEL_NAME] [-e EPOCHS] [-lr LEARNING_RATE]
                  [-b BATCH_SIZE]
+-h, --help            show this help message and exit
+-d DATASET, --dataset DATASET
                         Enter the name of the dataset to be used
+-s SPLIT, --split SPLIT
                         Enter the split required
+-n NAME, --name NAME  Enter the name of the model
+-mt MODEL_TYPE, --model_type MODEL_TYPE
                         Enter the model type
+-m MODEL_NAME, --model_name MODEL_NAME
                         Enter the model to be used eg t5-base
+-e EPOCHS, --epochs EPOCHS
                         Enter the number of epochs
+-lr LEARNING_RATE, --learning-rate LEARNING_RATE
                         Enter the number of epochs
+-b BATCH_SIZE, --batch-size BATCH_SIZE
                         Enter the number of batches
             """)
             start(arguments=arguments)