pe-llm-0 / textsegmentation.py
kolkata97's picture
Update textsegmentation.py
9d49094
def textsegmentation():
# Read the contract text from the file
with open(contract_file_path, 'r') as file:
contract_text = file.read()
# Tokenize the contract text into sentences
sentences = nltk.sent_tokenize(contract_text)
# Prepare data for CSV
data = [(i+1, sentence) for i, sentence in enumerate(sentences)]
# Write the data to CSV file
with open(output_csv_file, 'w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(['Sentence ID', 'Sentence Text']) # Write header
writer.writerows(data)
print("Output saved to CSV file.")