Spaces:

nhosseini
/

AITableQA

Sleeping

App Files Files Community

nhosseini commited on Oct 20

Commit

b94eb4c

•

1 Parent(s): 81034d0

Update app.py

Browse files

update preprocess

Files changed (1) hide show

app.py +7 -9

app.py CHANGED Viewed

@@ -46,7 +46,7 @@ def process_table_query(query, table_data):
         # If the results are not numerical, return the joined string
         return ' '.join(results)
-# Gradio interface
 def answer_query_from_csv(query, file):
     """
     Function to handle file input and return model results.
@@ -54,15 +54,12 @@ def answer_query_from_csv(query, file):
     # Read the file into a DataFrame
     table_data = pd.read_csv(file)
-    # Convert object-type columns to lowercase (if they are valid strings)
     for column in table_data.columns:
-        if table_data[column].dtype == 'object':
             table_data[column] = table_data[column].apply(lambda x: x.lower() if isinstance(x, str) else x)
-    # Convert all table cells to strings for TAPEX compatibility
-    table_data = table_data.astype(str)
-    # Extract year, month, day, and time components for datetime columns
     for column in table_data.columns:
         if pd.api.types.is_datetime64_any_dtype(table_data[column]):
             table_data[f'{column}_year'] = table_data[column].dt.year
@@ -70,15 +67,16 @@ def answer_query_from_csv(query, file):
             table_data[f'{column}_day'] = table_data[column].dt.day
             table_data[f'{column}_time'] = table_data[column].dt.strftime('%H:%M:%S')
-    # Process the CSV file and query using TAPEX
     result_tapex = process_table_query(query, table_data)
     # Process the query using TAPAS pipelines
     result_tapas = pipe_tapas(table=table_data, query=query)['cells'][0]
     result_tapas2 = pipe_tapas2(table=table_data, query=query)['cells'][0]
     return result_tapex, result_tapas, result_tapas2
 # Create Gradio interface
 with gr.Blocks() as interface:
     gr.Markdown("# Table Question Answering with TAPEX and TAPAS Models")

         # If the results are not numerical, return the joined string
         return ' '.join(results)
 def answer_query_from_csv(query, file):
     """
     Function to handle file input and return model results.
     # Read the file into a DataFrame
     table_data = pd.read_csv(file)
+    # Convert object-type columns (text) to lowercase, leaving numeric columns as is
     for column in table_data.columns:
+        if table_data[column].dtype == 'object':  # Only apply to text columns
             table_data[column] = table_data[column].apply(lambda x: x.lower() if isinstance(x, str) else x)
+    # Handle datetime columns to extract components
     for column in table_data.columns:
         if pd.api.types.is_datetime64_any_dtype(table_data[column]):
             table_data[f'{column}_year'] = table_data[column].dt.year
             table_data[f'{column}_day'] = table_data[column].dt.day
             table_data[f'{column}_time'] = table_data[column].dt.strftime('%H:%M:%S')
+    # Now process the table and query
     result_tapex = process_table_query(query, table_data)
     # Process the query using TAPAS pipelines
     result_tapas = pipe_tapas(table=table_data, query=query)['cells'][0]
     result_tapas2 = pipe_tapas2(table=table_data, query=query)['cells'][0]
     return result_tapex, result_tapas, result_tapas2
 # Create Gradio interface
 with gr.Blocks() as interface:
     gr.Markdown("# Table Question Answering with TAPEX and TAPAS Models")