gagan3012 commited on
Commit
f49c162
1 Parent(s): 01dffc6

Fixes and updates

Browse files
Files changed (5) hide show
  1. app.py +14 -1
  2. setup.py +1 -1
  3. src/data/process_data.py +9 -10
  4. src/models/predict_model.py +1 -1
  5. t5s/cli.py +13 -13
app.py CHANGED
@@ -1,6 +1,19 @@
1
  import streamlit as st
2
- from src.models.predict_model import predict_model
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  def visualize():
6
  st.write("# Summarization UI")
 
1
  import streamlit as st
2
+ import yaml
3
 
4
+ from src.models.model import Summarization
5
+
6
+ def predict_model(text: str):
7
+ """
8
+ Predict the summary of the given text.
9
+ """
10
+ with open("model_params.yml") as f:
11
+ params = yaml.safe_load(f)
12
+
13
+ model = Summarization()
14
+ model.load_model(model_type=params["model_type"], model_dir="gagan3012/summarsiation")
15
+ pre_summary = model.predict(text)
16
+ return pre_summary
17
 
18
  def visualize():
19
  st.write("# Summarization UI")
setup.py CHANGED
@@ -12,7 +12,7 @@ with open('requirements.txt') as f:
12
  setup(
13
  name='t5s',
14
  packages=find_packages(include=['t5s*']),
15
- version='2.0.4',
16
  description="T5 Summarisation Using Pytorch Lightning",
17
  license='MIT License',
18
  classifiers=[
 
12
  setup(
13
  name='t5s',
14
  packages=find_packages(include=['t5s*']),
15
+ version='2.0.5',
16
  description="T5 Summarisation Using Pytorch Lightning",
17
  license='MIT License',
18
  classifiers=[
src/data/process_data.py CHANGED
@@ -2,18 +2,17 @@ import pandas as pd
2
  import yaml
3
 
4
 
5
- def process_data(split="train"):
6
-
7
- with open("data_params.yml") as f:
8
- params = yaml.safe_load(f)
9
-
10
  df = pd.read_csv("data/raw/{}.csv".format(split))
11
  df.columns = ["Unnamed: 0", "input_text", "output_text"]
12
- df = df.sample(frac=params["split"], replace=True, random_state=1)
13
- df.to_csv("data/processed/{}.csv".format(split))
14
 
15
 
16
  if __name__ == "__main__":
17
- process_data(split="train")
18
- process_data(split="test")
19
- process_data(split="validation")
 
 
 
 
2
  import yaml
3
 
4
 
5
+ def process_data(frac=0.1, split="train"):
 
 
 
 
6
  df = pd.read_csv("data/raw/{}.csv".format(split))
7
  df.columns = ["Unnamed: 0", "input_text", "output_text"]
8
+ df_new = df.sample(frac=frac, replace=True, random_state=1)
9
+ df_new.to_csv("data/processed/{}.csv".format(split))
10
 
11
 
12
  if __name__ == "__main__":
13
+ with open("data_params.yml") as f:
14
+ params = yaml.safe_load(f)
15
+
16
+ process_data(frac=params['split'], split="train")
17
+ process_data(frac=params['split'], split="test")
18
+ process_data(frac=params['split'], split="validation")
src/models/predict_model.py CHANGED
@@ -11,6 +11,6 @@ def predict_model(text: str):
11
  params = yaml.safe_load(f)
12
 
13
  model = Summarization()
14
- model.load_model(model_type=params["model_type"], model_dir="gagan3012/summarsiation")
15
  pre_summary = model.predict(text)
16
  return pre_summary
 
11
  params = yaml.safe_load(f)
12
 
13
  model = Summarization()
14
+ model.load_model(model_type=params["model_type"], model_dir=params["model_dir"])
15
  pre_summary = model.predict(text)
16
  return pre_summary
t5s/cli.py CHANGED
@@ -22,16 +22,16 @@ parser_start.add_argument(
22
  "-d",
23
  "--dataset",
24
  default="cnn_dailymail",
25
- help="Enter the name of the dataset to be used",type=str
26
  )
27
 
28
- parser_start.add_argument("-s", "--split", default=0.001, help="Enter the split required",type=float)
29
 
30
  parser_start.add_argument(
31
  "-n", "--name", default="summarsiation", help="Enter the name of the model"
32
  )
33
  parser_start.add_argument(
34
- "-mt", "--model_type", default="t5", help="Enter the model type",type=str
35
  )
36
  parser_start.add_argument(
37
  "-m",
@@ -113,25 +113,25 @@ class Run(object):
113
  elif arguments["command"] == "start":
114
  os.chdir("./summarization/")
115
  print("""
116
- usage: t5s start [-h] [-d DATASET] [-s SPLIT] [-n NAME] [-mt MODEL_TYPE]
117
  [-m MODEL_NAME] [-e EPOCHS] [-lr LEARNING_RATE]
118
  [-b BATCH_SIZE]
119
 
120
- -h, --help show this help message and exit
121
- -d DATASET, --dataset DATASET
122
  Enter the name of the dataset to be used
123
- -s SPLIT, --split SPLIT
124
  Enter the split required
125
- -n NAME, --name NAME Enter the name of the model
126
- -mt MODEL_TYPE, --model_type MODEL_TYPE
127
  Enter the model type
128
- -m MODEL_NAME, --model_name MODEL_NAME
129
  Enter the model to be used eg t5-base
130
- -e EPOCHS, --epochs EPOCHS
131
  Enter the number of epochs
132
- -lr LEARNING_RATE, --learning-rate LEARNING_RATE
133
  Enter the number of epochs
134
- -b BATCH_SIZE, --batch-size BATCH_SIZE
135
  Enter the number of batches
136
  """)
137
  start(arguments=arguments)
 
22
  "-d",
23
  "--dataset",
24
  default="cnn_dailymail",
25
+ help="Enter the name of the dataset to be used", type=str
26
  )
27
 
28
+ parser_start.add_argument("-s", "--split", default=0.001, help="Enter the split required", type=float)
29
 
30
  parser_start.add_argument(
31
  "-n", "--name", default="summarsiation", help="Enter the name of the model"
32
  )
33
  parser_start.add_argument(
34
+ "-mt", "--model_type", default="t5", help="Enter the model type", type=str
35
  )
36
  parser_start.add_argument(
37
  "-m",
 
113
  elif arguments["command"] == "start":
114
  os.chdir("./summarization/")
115
  print("""
116
+ usage: t5s start [-h] [-d DATASET] [-s SPLIT] [-n NAME] [-mt MODEL_TYPE]
117
  [-m MODEL_NAME] [-e EPOCHS] [-lr LEARNING_RATE]
118
  [-b BATCH_SIZE]
119
 
120
+ -h, --help show this help message and exit
121
+ -d DATASET, --dataset DATASET
122
  Enter the name of the dataset to be used
123
+ -s SPLIT, --split SPLIT
124
  Enter the split required
125
+ -n NAME, --name NAME Enter the name of the model
126
+ -mt MODEL_TYPE, --model_type MODEL_TYPE
127
  Enter the model type
128
+ -m MODEL_NAME, --model_name MODEL_NAME
129
  Enter the model to be used eg t5-base
130
+ -e EPOCHS, --epochs EPOCHS
131
  Enter the number of epochs
132
+ -lr LEARNING_RATE, --learning-rate LEARNING_RATE
133
  Enter the number of epochs
134
+ -b BATCH_SIZE, --batch-size BATCH_SIZE
135
  Enter the number of batches
136
  """)
137
  start(arguments=arguments)