| | from langflow.base.data.utils import TEXT_FILE_TYPES, parallel_load_data, parse_text_file_to_data, retrieve_file_paths |
| | from langflow.custom import Component |
| | from langflow.io import BoolInput, IntInput, MessageTextInput |
| | from langflow.schema import Data |
| | from langflow.template import Output |
| |
|
| |
|
| | class DirectoryComponent(Component): |
| | display_name = "Directory" |
| | description = "Recursively load files from a directory." |
| | icon = "folder" |
| | name = "Directory" |
| |
|
| | inputs = [ |
| | MessageTextInput( |
| | name="path", |
| | display_name="Path", |
| | info="Path to the directory to load files from.", |
| | ), |
| | MessageTextInput( |
| | name="types", |
| | display_name="Types", |
| | info="File types to load. Leave empty to load all default supported types.", |
| | is_list=True, |
| | ), |
| | IntInput( |
| | name="depth", |
| | display_name="Depth", |
| | info="Depth to search for files.", |
| | value=0, |
| | ), |
| | IntInput( |
| | name="max_concurrency", |
| | display_name="Max Concurrency", |
| | advanced=True, |
| | info="Maximum concurrency for loading files.", |
| | value=2, |
| | ), |
| | BoolInput( |
| | name="load_hidden", |
| | display_name="Load Hidden", |
| | advanced=True, |
| | info="If true, hidden files will be loaded.", |
| | ), |
| | BoolInput( |
| | name="recursive", |
| | display_name="Recursive", |
| | advanced=True, |
| | info="If true, the search will be recursive.", |
| | ), |
| | BoolInput( |
| | name="silent_errors", |
| | display_name="Silent Errors", |
| | advanced=True, |
| | info="If true, errors will not raise an exception.", |
| | ), |
| | BoolInput( |
| | name="use_multithreading", |
| | display_name="Use Multithreading", |
| | advanced=True, |
| | info="If true, multithreading will be used.", |
| | ), |
| | ] |
| |
|
| | outputs = [ |
| | Output(display_name="Data", name="data", method="load_directory"), |
| | ] |
| |
|
| | def load_directory(self) -> list[Data]: |
| | path = self.path |
| | types = ( |
| | self.types if self.types and self.types != [""] else TEXT_FILE_TYPES |
| | ) |
| | depth = self.depth |
| | max_concurrency = self.max_concurrency |
| | load_hidden = self.load_hidden |
| | recursive = self.recursive |
| | silent_errors = self.silent_errors |
| | use_multithreading = self.use_multithreading |
| |
|
| | resolved_path = self.resolve_path(path) |
| | file_paths = retrieve_file_paths( |
| | resolved_path, load_hidden=load_hidden, recursive=recursive, depth=depth, types=types |
| | ) |
| |
|
| | if types: |
| | file_paths = [fp for fp in file_paths if any(fp.endswith(ext) for ext in types)] |
| |
|
| | loaded_data = [] |
| |
|
| | if use_multithreading: |
| | loaded_data = parallel_load_data(file_paths, silent_errors=silent_errors, max_concurrency=max_concurrency) |
| | else: |
| | loaded_data = [parse_text_file_to_data(file_path, silent_errors=silent_errors) for file_path in file_paths] |
| | loaded_data = list(filter(None, loaded_data)) |
| | self.status = loaded_data |
| | return loaded_data |
| |
|