Spaces:
Sleeping
Sleeping
File size: 3,427 Bytes
d8d14f1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
import csv
import json
import os
from swarms.utils.pdf_to_text import pdf_to_text
def csv_to_text(file: str) -> str:
"""
Converts a CSV file to text format.
Args:
file (str): The path to the CSV file.
Returns:
str: The text representation of the CSV file.
Raises:
FileNotFoundError: If the file does not exist.
IOError: If there is an error reading the file.
"""
with open(file) as file:
reader = csv.reader(file)
data = list(reader)
return str(data)
def json_to_text(file: str) -> str:
"""
Converts a JSON file to text format.
Args:
file (str): The path to the JSON file.
Returns:
str: The text representation of the JSON file.
Raises:
FileNotFoundError: If the file does not exist.
IOError: If there is an error reading the file.
"""
with open(file) as file:
data = json.load(file)
return json.dumps(data)
def txt_to_text(file: str) -> str:
"""
Reads a text file and returns its content as a string.
Args:
file (str): The path to the text file.
Returns:
str: The content of the text file.
Raises:
FileNotFoundError: If the file does not exist.
IOError: If there is an error reading the file.
"""
with open(file) as file:
data = file.read()
return data
def md_to_text(file: str) -> str:
"""
Reads a Markdown file and returns its content as a string.
Args:
file (str): The path to the Markdown file.
Returns:
str: The content of the Markdown file.
Raises:
FileNotFoundError: If the file does not exist.
IOError: If there is an error reading the file.
"""
if not os.path.exists(file):
raise FileNotFoundError(
f"No such file or directory: '{file}'"
)
with open(file) as file:
data = file.read()
return data
def data_to_text(file: str) -> str:
"""
Converts the given data file to text format.
Args:
file (str): The path to the data file.
Returns:
str: The text representation of the data file.
Raises:
FileNotFoundError: If the file does not exist.
IOError: If there is an error reading the file.
Examples:
>>> data_to_text("data.csv")
'This is the text representation of the data file.'
"""
if not os.path.exists(file):
raise FileNotFoundError(f"File not found: {file}")
try:
_, ext = os.path.splitext(file)
ext = (
ext.lower()
) # Convert extension to lowercase for case-insensitive comparison
if ext == ".csv":
return csv_to_text(file)
elif ext == ".json":
return json_to_text(file)
elif ext == ".txt":
return txt_to_text(file)
elif ext == ".pdf":
return pdf_to_text(file)
elif ext == ".md":
return md_to_text(file)
else:
# Check if the file is a binary file (like an image)
if ext in [".png", ".jpg", ".jpeg", ".gif", ".bmp"]:
# Skip binary files
return None
else:
with open(file) as file:
data = file.read()
return data
except Exception as e:
raise OSError(f"Error reading file: {file}") from e
|