Spaces:
Runtime error
Runtime error
Fixes and updates
Browse files- app.py +14 -1
- setup.py +1 -1
- src/data/process_data.py +9 -10
- src/models/predict_model.py +1 -1
- t5s/cli.py +13 -13
app.py
CHANGED
@@ -1,6 +1,19 @@
|
|
1 |
import streamlit as st
|
2 |
-
|
3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
def visualize():
|
6 |
st.write("# Summarization UI")
|
|
|
1 |
import streamlit as st
|
2 |
+
import yaml
|
3 |
|
4 |
+
from src.models.model import Summarization
|
5 |
+
|
6 |
+
def predict_model(text: str):
|
7 |
+
"""
|
8 |
+
Predict the summary of the given text.
|
9 |
+
"""
|
10 |
+
with open("model_params.yml") as f:
|
11 |
+
params = yaml.safe_load(f)
|
12 |
+
|
13 |
+
model = Summarization()
|
14 |
+
model.load_model(model_type=params["model_type"], model_dir="gagan3012/summarsiation")
|
15 |
+
pre_summary = model.predict(text)
|
16 |
+
return pre_summary
|
17 |
|
18 |
def visualize():
|
19 |
st.write("# Summarization UI")
|
setup.py
CHANGED
@@ -12,7 +12,7 @@ with open('requirements.txt') as f:
|
|
12 |
setup(
|
13 |
name='t5s',
|
14 |
packages=find_packages(include=['t5s*']),
|
15 |
-
version='2.0.
|
16 |
description="T5 Summarisation Using Pytorch Lightning",
|
17 |
license='MIT License',
|
18 |
classifiers=[
|
|
|
12 |
setup(
|
13 |
name='t5s',
|
14 |
packages=find_packages(include=['t5s*']),
|
15 |
+
version='2.0.5',
|
16 |
description="T5 Summarisation Using Pytorch Lightning",
|
17 |
license='MIT License',
|
18 |
classifiers=[
|
src/data/process_data.py
CHANGED
@@ -2,18 +2,17 @@ import pandas as pd
|
|
2 |
import yaml
|
3 |
|
4 |
|
5 |
-
def process_data(split="train"):
|
6 |
-
|
7 |
-
with open("data_params.yml") as f:
|
8 |
-
params = yaml.safe_load(f)
|
9 |
-
|
10 |
df = pd.read_csv("data/raw/{}.csv".format(split))
|
11 |
df.columns = ["Unnamed: 0", "input_text", "output_text"]
|
12 |
-
|
13 |
-
|
14 |
|
15 |
|
16 |
if __name__ == "__main__":
|
17 |
-
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
|
2 |
import yaml
|
3 |
|
4 |
|
5 |
+
def process_data(frac=0.1, split="train"):
|
|
|
|
|
|
|
|
|
6 |
df = pd.read_csv("data/raw/{}.csv".format(split))
|
7 |
df.columns = ["Unnamed: 0", "input_text", "output_text"]
|
8 |
+
df_new = df.sample(frac=frac, replace=True, random_state=1)
|
9 |
+
df_new.to_csv("data/processed/{}.csv".format(split))
|
10 |
|
11 |
|
12 |
if __name__ == "__main__":
|
13 |
+
with open("data_params.yml") as f:
|
14 |
+
params = yaml.safe_load(f)
|
15 |
+
|
16 |
+
process_data(frac=params['split'], split="train")
|
17 |
+
process_data(frac=params['split'], split="test")
|
18 |
+
process_data(frac=params['split'], split="validation")
|
src/models/predict_model.py
CHANGED
@@ -11,6 +11,6 @@ def predict_model(text: str):
|
|
11 |
params = yaml.safe_load(f)
|
12 |
|
13 |
model = Summarization()
|
14 |
-
model.load_model(model_type=params["model_type"], model_dir="
|
15 |
pre_summary = model.predict(text)
|
16 |
return pre_summary
|
|
|
11 |
params = yaml.safe_load(f)
|
12 |
|
13 |
model = Summarization()
|
14 |
+
model.load_model(model_type=params["model_type"], model_dir=params["model_dir"])
|
15 |
pre_summary = model.predict(text)
|
16 |
return pre_summary
|
t5s/cli.py
CHANGED
@@ -22,16 +22,16 @@ parser_start.add_argument(
|
|
22 |
"-d",
|
23 |
"--dataset",
|
24 |
default="cnn_dailymail",
|
25 |
-
help="Enter the name of the dataset to be used",type=str
|
26 |
)
|
27 |
|
28 |
-
parser_start.add_argument("-s", "--split", default=0.001, help="Enter the split required",type=float)
|
29 |
|
30 |
parser_start.add_argument(
|
31 |
"-n", "--name", default="summarsiation", help="Enter the name of the model"
|
32 |
)
|
33 |
parser_start.add_argument(
|
34 |
-
"-mt", "--model_type", default="t5", help="Enter the model type",type=str
|
35 |
)
|
36 |
parser_start.add_argument(
|
37 |
"-m",
|
@@ -113,25 +113,25 @@ class Run(object):
|
|
113 |
elif arguments["command"] == "start":
|
114 |
os.chdir("./summarization/")
|
115 |
print("""
|
116 |
-
|
117 |
[-m MODEL_NAME] [-e EPOCHS] [-lr LEARNING_RATE]
|
118 |
[-b BATCH_SIZE]
|
119 |
|
120 |
-
|
121 |
-
|
122 |
Enter the name of the dataset to be used
|
123 |
-
|
124 |
Enter the split required
|
125 |
-
|
126 |
-
|
127 |
Enter the model type
|
128 |
-
|
129 |
Enter the model to be used eg t5-base
|
130 |
-
|
131 |
Enter the number of epochs
|
132 |
-
|
133 |
Enter the number of epochs
|
134 |
-
|
135 |
Enter the number of batches
|
136 |
""")
|
137 |
start(arguments=arguments)
|
|
|
22 |
"-d",
|
23 |
"--dataset",
|
24 |
default="cnn_dailymail",
|
25 |
+
help="Enter the name of the dataset to be used", type=str
|
26 |
)
|
27 |
|
28 |
+
parser_start.add_argument("-s", "--split", default=0.001, help="Enter the split required", type=float)
|
29 |
|
30 |
parser_start.add_argument(
|
31 |
"-n", "--name", default="summarsiation", help="Enter the name of the model"
|
32 |
)
|
33 |
parser_start.add_argument(
|
34 |
+
"-mt", "--model_type", default="t5", help="Enter the model type", type=str
|
35 |
)
|
36 |
parser_start.add_argument(
|
37 |
"-m",
|
|
|
113 |
elif arguments["command"] == "start":
|
114 |
os.chdir("./summarization/")
|
115 |
print("""
|
116 |
+
usage: t5s start [-h] [-d DATASET] [-s SPLIT] [-n NAME] [-mt MODEL_TYPE]
|
117 |
[-m MODEL_NAME] [-e EPOCHS] [-lr LEARNING_RATE]
|
118 |
[-b BATCH_SIZE]
|
119 |
|
120 |
+
-h, --help show this help message and exit
|
121 |
+
-d DATASET, --dataset DATASET
|
122 |
Enter the name of the dataset to be used
|
123 |
+
-s SPLIT, --split SPLIT
|
124 |
Enter the split required
|
125 |
+
-n NAME, --name NAME Enter the name of the model
|
126 |
+
-mt MODEL_TYPE, --model_type MODEL_TYPE
|
127 |
Enter the model type
|
128 |
+
-m MODEL_NAME, --model_name MODEL_NAME
|
129 |
Enter the model to be used eg t5-base
|
130 |
+
-e EPOCHS, --epochs EPOCHS
|
131 |
Enter the number of epochs
|
132 |
+
-lr LEARNING_RATE, --learning-rate LEARNING_RATE
|
133 |
Enter the number of epochs
|
134 |
+
-b BATCH_SIZE, --batch-size BATCH_SIZE
|
135 |
Enter the number of batches
|
136 |
""")
|
137 |
start(arguments=arguments)
|