Spaces:
Running
Running
oceansweep
commited on
Commit
•
facc9cd
1
Parent(s):
2842ee9
Update App_Function_Libraries/Chunk_Lib.py
Browse files
App_Function_Libraries/Chunk_Lib.py
CHANGED
@@ -48,6 +48,18 @@ def load_document(file_path):
|
|
48 |
text = file.read()
|
49 |
return re.sub('\\s+', ' ', text).strip()
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
def improved_chunking_process(text: str, chunk_options: Dict[str, Any]) -> List[Dict[str, Any]]:
|
53 |
chunk_method = chunk_options.get('method', 'words')
|
|
|
48 |
text = file.read()
|
49 |
return re.sub('\\s+', ' ', text).strip()
|
50 |
|
51 |
+
# Load configuration
|
52 |
+
config = load_comprehensive_config()
|
53 |
+
# Embedding Chunking options
|
54 |
+
chunk_options = {
|
55 |
+
'method': config.get('Chunking', 'method', fallback='words'),
|
56 |
+
'max_size': config.getint('Chunking', 'max_size', fallback=400),
|
57 |
+
'overlap': config.getint('Chunking', 'overlap', fallback=200),
|
58 |
+
'adaptive': config.getboolean('Chunking', 'adaptive', fallback=False),
|
59 |
+
'multi_level': config.getboolean('Chunking', 'multi_level', fallback=False),
|
60 |
+
'language': config.get('Chunking', 'language', fallback='english')
|
61 |
+
}
|
62 |
+
|
63 |
|
64 |
def improved_chunking_process(text: str, chunk_options: Dict[str, Any]) -> List[Dict[str, Any]]:
|
65 |
chunk_method = chunk_options.get('method', 'words')
|