Spaces:
Sleeping
Sleeping
| from typing import List | |
| class Chunker: | |
| def chunk(self, text: str) -> List[str]: | |
| raise NotImplementedError | |
| class SlidingWindowChunker(Chunker): | |
| def __init__(self, chunk_size: int = 512, overlap: int = 50): | |
| self.chunk_size = chunk_size | |
| self.overlap = overlap | |
| def chunk(self, text: str) -> List[str]: | |
| words = text.split() | |
| chunks = [] | |
| for i in range(0, len(words), self.chunk_size - self.overlap): | |
| chunk_words = words[i : i + self.chunk_size] | |
| chunks.append(" ".join(chunk_words)) | |
| if i + self.chunk_size >= len(words): | |
| break | |
| return chunks | |