| | |
| | import csv |
| | import json |
| |
|
| | |
| | print("Reading row 57 from CSV...") |
| | with open('function_dataset_v2.csv', 'r') as f: |
| | reader = csv.DictReader(f) |
| | for i, row in enumerate(reader): |
| | if i == 56: |
| | print(f"Row {i+1}:") |
| | print(f" original_index: {row['original_index']}") |
| | print(f" repo_name: '{row['repo_name']}'") |
| | print(f" path: '{row['path']}'") |
| | print(f" language: '{row['language']}'") |
| | print(f" function_name: '{row['function_name']}'") |
| | break |
| |
|
| | |
| | print("\n\nChecking first JSONL entry...") |
| | with open('programming_problems.jsonl', 'r') as f: |
| | data = json.loads(f.readline()) |
| | print(f"original_index: {data['metadata']['original_index']}") |
| | print(f"function_name: {data['metadata']['function_name']}") |
| | print(f"Current repo_name: '{data['metadata']['repo_name']}'") |
| | print(f"Current path: '{data['metadata']['path']}'") |
| | print(f"Current language: '{data['metadata']['language']}'") |
| |
|
| | |
| | print("\n\nCounting CSV rows with complete metadata...") |
| | with open('function_dataset_v2.csv', 'r') as f: |
| | reader = csv.DictReader(f) |
| | total = 0 |
| | complete = 0 |
| | for row in reader: |
| | total += 1 |
| | if row['repo_name'] and row['path'] and row['language']: |
| | complete += 1 |
| | print(f"Total CSV rows: {total}") |
| | print(f"Rows with complete metadata: {complete}") |
| | print(f"Rows with missing metadata: {total - complete}") |
| |
|