shivi commited on
Commit
684811b
β€’
1 Parent(s): 563baab

added final app setup

Browse files
app.py CHANGED
@@ -1,13 +1,14 @@
1
  import gradio as gr
2
- from .constants import CSV_HEADER, NUMERIC_FEATURE_NAMES, CATEGORICAL_FEATURES_WITH_VOCABULARY, NUMBER_INPUT_COLS
3
- from .preprocess import create_max_values_map, create_dropdown_default_values_map, create_sample_test_data
4
- from .predict import batch_predict, user_input_predict
5
 
6
  inputs_list = []
7
  max_values_map = create_max_values_map()
8
  dropdown_default_values_map = create_dropdown_default_values_map()
9
  sample_input_df_val = create_sample_test_data()
10
 
 
11
  demo = gr.Blocks()
12
 
13
  with demo:
@@ -20,7 +21,7 @@ with demo:
20
 
21
  with gr.TabItem("Predict using batch of inputs"):
22
  gr.Markdown("**Input DataFrame** \n")
23
- input_df = gr.Dataframe(headers=CSV_HEADER,value=samp,)
24
  gr.Markdown("**Output DataFrame** \n")
25
  output_df = gr.Dataframe()
26
  gr.Markdown("**Make Predictions**")
@@ -39,7 +40,7 @@ with demo:
39
  else:
40
  curr_max_val = max_values_map["max_"+num_variable]
41
  numeric_input = gr.Slider(0,curr_max_val, label=num_variable,step=1)
42
- inputs_list.append(numeric_input)
43
 
44
  with gr.TabItem("Categorical Inputs"):
45
  gr.Markdown("Choose values for categorical inputs here.")
 
1
  import gradio as gr
2
+ from utils.constants import CSV_HEADER, NUMERIC_FEATURE_NAMES, NUMBER_INPUT_COLS
3
+ from utils.preprocess import create_max_values_map, create_dropdown_default_values_map, create_sample_test_data, CATEGORICAL_FEATURES_WITH_VOCABULARY
4
+ from utils.predict import batch_predict, user_input_predict
5
 
6
  inputs_list = []
7
  max_values_map = create_max_values_map()
8
  dropdown_default_values_map = create_dropdown_default_values_map()
9
  sample_input_df_val = create_sample_test_data()
10
 
11
+
12
  demo = gr.Blocks()
13
 
14
  with demo:
 
21
 
22
  with gr.TabItem("Predict using batch of inputs"):
23
  gr.Markdown("**Input DataFrame** \n")
24
+ input_df = gr.Dataframe(headers=CSV_HEADER,value=sample_input_df_val,)
25
  gr.Markdown("**Output DataFrame** \n")
26
  output_df = gr.Dataframe()
27
  gr.Markdown("**Make Predictions**")
 
40
  else:
41
  curr_max_val = max_values_map["max_"+num_variable]
42
  numeric_input = gr.Slider(0,curr_max_val, label=num_variable,step=1)
43
+ inputs_list.append(numeric_input)
44
 
45
  with gr.TabItem("Categorical Inputs"):
46
  gr.Markdown("Choose values for categorical inputs here.")
constants.py β†’ utils/constants.py RENAMED
@@ -1,6 +1,3 @@
1
- import pandas as pd
2
- from .preprocess import load_test_data
3
-
4
  # Column names.
5
  CSV_HEADER = [
6
  "age",
@@ -67,22 +64,4 @@ NUMERIC_FEATURE_NAMES = [
67
  ##Cols which will use "Number" component of gradio for taking user input
68
  NUMBER_INPUT_COLS = ['age', 'num_persons_worked_for_employer','weeks_worked_in_year']
69
 
70
- test_data = load_test_data()
71
 
72
- CATEGORICAL_FEATURES_WITH_VOCABULARY = {
73
- feature_name: sorted([str(value) for value in list(test_data[feature_name].unique())])
74
- for feature_name in CSV_HEADER
75
- if feature_name
76
- not in list(NUMERIC_FEATURE_NAMES + [WEIGHT_COLUMN_NAME, TARGET_FEATURE_NAME])
77
- }
78
- # All features names.
79
- FEATURE_NAMES = NUMERIC_FEATURE_NAMES + list(
80
- CATEGORICAL_FEATURES_WITH_VOCABULARY.keys()
81
- )
82
- # Feature default values.
83
- COLUMN_DEFAULTS = [
84
- [0.0]
85
- if feature_name in NUMERIC_FEATURE_NAMES + [TARGET_FEATURE_NAME, WEIGHT_COLUMN_NAME]
86
- else ["NA"]
87
- for feature_name in CSV_HEADER
88
- ]
 
 
 
 
1
  # Column names.
2
  CSV_HEADER = [
3
  "age",
 
64
  ##Cols which will use "Number" component of gradio for taking user input
65
  NUMBER_INPUT_COLS = ['age', 'num_persons_worked_for_employer','weeks_worked_in_year']
66
 
 
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
predict.py β†’ utils/predict.py RENAMED
@@ -12,7 +12,7 @@ def batch_predict(input_data):
12
  1. prediction probability for each class
13
  2. actual expected outcome for each entry in the input dataframe
14
  """
15
- input_data_file = "prod_data.csv"
16
  labels = ['Probability of Income greater than 50000',"Probability of Income less than 50000","Actual Income"]
17
 
18
  predictions_df = pd.DataFrame(columns=labels)
 
12
  1. prediction probability for each class
13
  2. actual expected outcome for each entry in the input dataframe
14
  """
15
+ input_data_file = "input_data.csv"
16
  labels = ['Probability of Income greater than 50000',"Probability of Income less than 50000","Actual Income"]
17
 
18
  predictions_df = pd.DataFrame(columns=labels)
preprocess.py β†’ utils/preprocess.py RENAMED
@@ -1,10 +1,37 @@
1
  import tensorflow as tf
2
  import pandas as pd
3
- from .constants import CSV_HEADER, TARGET_FEATURE_NAME, WEIGHT_COLUMN_NAME, NUMERIC_FEATURE_NAMES, COLUMN_DEFAULTS, CATEGORICAL_FEATURES_WITH_VOCABULARY
4
 
5
 
6
  ##Helper functions for preprocessing of data:
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  def process(features, target):
9
  for feature_name in features:
10
  if feature_name in CATEGORICAL_FEATURES_WITH_VOCABULARY:
@@ -44,22 +71,14 @@ def create_dropdown_default_values_map():
44
  dropdown_default_values_map["max_"+col] = max_val
45
  return dropdown_default_values_map
46
 
47
- def load_test_data():
48
-
49
- test_data_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/census-income-mld/census-income.test.gz"
50
- test_data = pd.read_csv(test_data_url, header=None, names=CSV_HEADER)
51
-
52
- return test_data
53
-
54
  def create_sample_test_data():
55
 
56
- test_data = load_test_data()
57
-
58
  test_data["income_level"] = test_data["income_level"].apply(
59
  lambda x: 0 if x == " - 50000." else 1)
60
 
61
  sample_df = test_data.loc[:20,:]
62
- sample_df_values = samp.values.tolist()
63
 
64
  return sample_df_values
65
-
 
 
1
  import tensorflow as tf
2
  import pandas as pd
3
+ from .constants import CSV_HEADER, TARGET_FEATURE_NAME, WEIGHT_COLUMN_NAME, NUMERIC_FEATURE_NAMES
4
 
5
 
6
  ##Helper functions for preprocessing of data:
7
 
8
+ def load_test_data():
9
+
10
+ test_data_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/census-income-mld/census-income.test.gz"
11
+ test_data = pd.read_csv(test_data_url, header=None, names=CSV_HEADER)
12
+
13
+ return test_data
14
+
15
+ test_data = load_test_data()
16
+
17
+ CATEGORICAL_FEATURES_WITH_VOCABULARY = {
18
+ feature_name: sorted([str(value) for value in list(test_data[feature_name].unique())])
19
+ for feature_name in CSV_HEADER
20
+ if feature_name
21
+ not in list(NUMERIC_FEATURE_NAMES + [WEIGHT_COLUMN_NAME, TARGET_FEATURE_NAME])
22
+ }
23
+ # All features names.
24
+ FEATURE_NAMES = NUMERIC_FEATURE_NAMES + list(
25
+ CATEGORICAL_FEATURES_WITH_VOCABULARY.keys()
26
+ )
27
+ # Feature default values.
28
+ COLUMN_DEFAULTS = [
29
+ [0.0]
30
+ if feature_name in NUMERIC_FEATURE_NAMES + [TARGET_FEATURE_NAME, WEIGHT_COLUMN_NAME]
31
+ else ["NA"]
32
+ for feature_name in CSV_HEADER
33
+ ]
34
+
35
  def process(features, target):
36
  for feature_name in features:
37
  if feature_name in CATEGORICAL_FEATURES_WITH_VOCABULARY:
 
71
  dropdown_default_values_map["max_"+col] = max_val
72
  return dropdown_default_values_map
73
 
 
 
 
 
 
 
 
74
  def create_sample_test_data():
75
 
 
 
76
  test_data["income_level"] = test_data["income_level"].apply(
77
  lambda x: 0 if x == " - 50000." else 1)
78
 
79
  sample_df = test_data.loc[:20,:]
80
+ sample_df_values = sample_df.values.tolist()
81
 
82
  return sample_df_values
83
+
84
+