Spaces:

Drago1995
/

AI-New-Question-Generator

Running

File size: 698 Bytes

a2ff264

from .IChunkGenerator import IChunkGenerator
import nltk
from nltk.tokenize import sent_tokenize
class ChunkGenerator(IChunkGenerator):
        def chunk_text(self, text: str,max_words: int=100) -> list:
            sentences = sent_tokenize(text)
            chunks, chunk = [], []
            word_count = 0

            for sentence in sentences:
                word_count += len(sentence.split())
                chunk.append(sentence)
                if word_count >= max_words:
                    chunks.append(" ".join(chunk))
                    chunk = []
                    word_count = 0

            if chunk:
                chunks.append(" ".join(chunk))

            return chunks