|
import io |
|
import pandas as pd |
|
|
|
def extract_table_from_response(gpt_response): |
|
|
|
lines = gpt_response.strip().split("\n") |
|
|
|
|
|
table_lines = [line for line in lines if '|' in line and len(line.split('|')) > 3] |
|
|
|
|
|
if not table_lines: |
|
return None |
|
|
|
|
|
first_table_index = lines.index(table_lines[0]) |
|
last_table_index = lines.index(table_lines[-1]) |
|
|
|
|
|
table_text = lines[first_table_index:last_table_index + 1] |
|
|
|
return table_text |
|
|
|
def gpt_response_to_dataframe(gpt_response): |
|
|
|
table_lines = extract_table_from_response(gpt_response) |
|
|
|
|
|
if table_lines is None or len(table_lines) == 0: |
|
return pd.DataFrame() |
|
|
|
|
|
try: |
|
|
|
sep_line_index = next(i for i, line in enumerate(table_lines) if set(line.strip()) == {'|', '-'}) |
|
except StopIteration: |
|
|
|
return pd.DataFrame() |
|
|
|
|
|
headers = [h.strip() for h in table_lines[sep_line_index - 1].split('|')[1:-1]] |
|
|
|
|
|
rows = [ |
|
[cell.strip() for cell in row.split('|')[1:-1]] |
|
for row in table_lines[sep_line_index + 1:] |
|
] |
|
|
|
|
|
df = pd.DataFrame(rows, columns=headers) |
|
return df |