Spaces:
Sleeping
Sleeping
import streamlit as st | |
def chunk_text(text, chunk_size, overlap): | |
chunks = [] | |
start = 0 | |
while start < len(text): | |
end = start + chunk_size | |
chunks.append(text[start:end]) | |
start += (chunk_size - overlap) | |
return chunks | |
def main(): | |
st.set_page_config(page_title="Text Chunker", page_icon="βοΈ", layout="centered") | |
# Custom CSS for styling | |
st.markdown(""" | |
<style> | |
.header { | |
color: #2F4F4F; | |
border-bottom: 2px solid #2F4F4F; | |
padding-bottom: 10px; | |
} | |
.chunk-box { | |
padding: 20px; | |
margin: 10px 0; | |
border-radius: 10px; | |
background-color: #F0F2F6; | |
box-shadow: 0 2px 4px rgba(0,0,0,0.1); | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
st.markdown('<h1 class="header">βοΈ Text Chunker</h1>', unsafe_allow_html=True) | |
# Example text | |
example_text = """Natural language processing (NLP) is a subfield of linguistics, computer science, | |
and artificial intelligence concerned with the interactions between computers and human language. | |
It focuses on how to program computers to process and analyze large amounts of natural language data. | |
The result is a computer capable of understanding natural language in a way that is both meaningful | |
and useful to humans.""" | |
# Inputs | |
input_text = st.text_area("Input Text", value=example_text, height=200) | |
col1, col2 = st.columns(2) | |
with col1: | |
chunk_size = st.slider("Chunk Size (characters)", 50, 200, 100, 10) | |
with col2: | |
overlap = st.slider("Overlap (characters)", 0, 50, 20, 5) | |
if overlap >= chunk_size: | |
st.error("Overlap must be smaller than chunk size!") | |
return | |
# Processing | |
if st.button("Chunk It!", type="primary"): | |
chunks = chunk_text(input_text, chunk_size, overlap) | |
st.markdown(f"**π {len(chunks)} Chunks Created**") | |
for i, chunk in enumerate(chunks, 1): | |
with st.container(): | |
st.markdown(f""" | |
<div class="chunk-box"> | |
<h4>Chunk #{i} (Length: {len(chunk)})</h4> | |
<hr style="border:1px solid #2F4F4F"> | |
<p>{chunk}</p> | |
</div> | |
""", unsafe_allow_html=True) | |
st.success("β Chunking completed! Scroll to see all chunks.") | |
if __name__ == "__main__": | |
main() |