customeragent-api / server /scripts /test_chunker.py
anasraza526's picture
Clean deploy to Hugging Face
ac90985
import sys
import os
# Add parent directory to path
current_dir = os.path.dirname(os.path.abspath(__file__))
parent_dir = os.path.dirname(current_dir)
sys.path.append(parent_dir)
from app.services.content_processor import SemanticChunker
def test_chunker():
html = """
<html>
<head>
<title>Test Page</title>
<meta name="description" content="This is a test description.">
</head>
<body>
<h1>Main Header</h1>
<p>Intro paragraph.</p>
<h2>Section 1: Details</h2>
<p>This is the first section content.</p>
<p>More details here.</p>
<h2>Section 2: Conclusion</h2>
<p>Final thoughts.</p>
</body>
</html>
"""
chunker = SemanticChunker()
chunks = chunker.chunk_document(html, "http://example.com")
print(f"Generated {len(chunks)} chunks:")
for i, c in enumerate(chunks):
print(f"--- Chunk {i} ({c.chunk_type}) ---")
print(f"Hash: {c.content_hash}")
print(f"Parent: {c.parent_id}")
print(f"Metadata: {c.metadata}")
print(f"Text Preview: {c.text[:50]}...")
print("")
if __name__ == "__main__":
test_chunker()