Jacaranda commited on
Commit
841bca9
1 Parent(s): 6de6e27

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -19
app.py CHANGED
@@ -3,6 +3,7 @@ import pandas as pd
3
  from tqdm import tqdm
4
  from facility_predict import Preprocess, Facility_Model, obj_Facility_Model, processor
5
 
 
6
  def predict_batch_from_csv(input_file, output_file):
7
  # Load batch data from CSV
8
  batch_data = pd.read_csv(input_file)
@@ -12,31 +13,44 @@ def predict_batch_from_csv(input_file, output_file):
12
 
13
  # Iterate over rows with tqdm for progress tracking
14
  for _, row in tqdm(batch_data.iterrows(), total=len(batch_data)):
15
- text = row['facility_name'] # Replace 'facility_name' with the actual column name containing the text data
16
- cleaned_text = processor.clean_text(text)
 
 
 
 
 
17
  prepared_data = processor.process_tokenizer(cleaned_text)
18
- prediction = obj_Facility_Model.inference(prepared_data)
 
 
 
 
 
19
  predictions.append(prediction)
20
 
21
  # Create DataFrame for predictions
22
  output_data = pd.DataFrame({'prediction': predictions})
 
23
  # Merge with input DataFrame
24
- pred_output_df = pd.concat([batch_data, output_data], axis=1)
 
25
  # Save predictions to CSV
26
  pred_output_df.to_csv(output_file, index=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
- def predict_batch(input_csv, output_csv):
29
- predict_batch_from_csv(input_csv, output_csv)
30
- return "Prediction completed. Results saved to " + output_csv
31
-
32
- iface = gr.Interface(
33
- fn=predict_batch,
34
- inputs=["file", "text"],
35
- outputs="text",
36
- title="Batch Facility Name Prediction",
37
- description="Upload a CSV file with facility names and get the predictions in a CSV file",
38
- #examples=[["input.csv", "output.csv"]],
39
- )
40
-
41
- if __name__ == "__main__":
42
- iface.launch()
 
3
  from tqdm import tqdm
4
  from facility_predict import Preprocess, Facility_Model, obj_Facility_Model, processor
5
 
6
+
7
  def predict_batch_from_csv(input_file, output_file):
8
  # Load batch data from CSV
9
  batch_data = pd.read_csv(input_file)
 
13
 
14
  # Iterate over rows with tqdm for progress tracking
15
  for _, row in tqdm(batch_data.iterrows(), total=len(batch_data)):
16
+ text = row['pnc_fac_name'] # Replace 'facility_name' with the actual column name containing the text data
17
+
18
+ if pd.isnull(text):
19
+ cleaned_text = ""
20
+ else:
21
+ cleaned_text = processor.clean_text(text)
22
+
23
  prepared_data = processor.process_tokenizer(cleaned_text)
24
+
25
+ if cleaned_text == "":
26
+ prediction = "" # Set prediction as empty string
27
+ else:
28
+ prediction = obj_Facility_Model.inference(prepared_data)
29
+
30
  predictions.append(prediction)
31
 
32
  # Create DataFrame for predictions
33
  output_data = pd.DataFrame({'prediction': predictions})
34
+
35
  # Merge with input DataFrame
36
+ pred_output_df = pd.concat([batch_data.reset_index(drop=True), output_data], axis=1)
37
+
38
  # Save predictions to CSV
39
  pred_output_df.to_csv(output_file, index=False)
40
+ return "Prediction completed. Results saved to " + output_file
41
+
42
+ # Define the Gradio interface
43
+ input_csv = gr.inputs.File(label="Input CSV", type="file", accept=".csv")
44
+ output_csv = gr.outputs.File(label="Output CSV", type="file", default="./output.csv")
45
+
46
+ # Define the prediction function for the Gradio interface
47
+ def predict_interface(input_file):
48
+ output_file = "./output.csv"
49
+ predict_batch_from_csv(input_file.name, output_file)
50
+ return output_file
51
+
52
+ # Connect the interface with the prediction function
53
+ iface = gr.Interface(fn=predict_interface, inputs=input_csv, outputs=output_csv, title="CSV Batch Prediction")
54
 
55
+ # Run the interface
56
+ iface.launch()