Spaces:

shashankkandimalla
/

ocr-text-processing

Paused

App Files Files Community

ocr-text-processing / ocr-application /lib /tcl8.6 /word.tcl

shashankkandimalla

Upload folder using huggingface_hub

288007d verified over 1 year ago

raw

history blame contribute delete

4.9 kB

	# word.tcl --
	#
	# This file defines various procedures for computing word boundaries in
	# strings. This file is primarily needed so Tk text and entry widgets behave
	# properly for different platforms.
	#
	# Copyright (c) 1996 Sun Microsystems, Inc.
	# Copyright (c) 1998 Scriptics Corporation.
	#
	# See the file "license.terms" for information on usage and redistribution
	# of this file, and for a DISCLAIMER OF ALL WARRANTIES.

	# The following variables are used to determine which characters are
	# interpreted as white space.

	if {$::tcl_platform(platform) eq "windows"} {
	# Windows style - any but a Unicode space char
	if {![info exists ::tcl_wordchars]} {
	set ::tcl_wordchars {\S}
	}
	if {![info exists ::tcl_nonwordchars]} {
	set ::tcl_nonwordchars {\s}
	}
	} else {
	# Motif style - any Unicode word char (number, letter, or underscore)
	if {![info exists ::tcl_wordchars]} {
	set ::tcl_wordchars {\w}
	}
	if {![info exists ::tcl_nonwordchars]} {
	set ::tcl_nonwordchars {\W}
	}
	}

	# Arrange for caches of the real matcher REs to be kept, which enables the REs
	# themselves to be cached for greater performance (and somewhat greater
	# clarity too).

	namespace eval ::tcl {
	variable WordBreakRE
	array set WordBreakRE {}

	proc UpdateWordBreakREs args {
	# Ignores the arguments
	global tcl_wordchars tcl_nonwordchars
	variable WordBreakRE

	# To keep the RE strings short...
	set letter $tcl_wordchars
	set space $tcl_nonwordchars

	set WordBreakRE(after) "$letter$space\|$space$letter"
	set WordBreakRE(before) "^.*($letter$space\|$space$letter)"
	set WordBreakRE(end) "$space*$letter+$space"
	set WordBreakRE(next) "$letter*$space+$letter"
	set WordBreakRE(previous) "$space($letter+)$space\$"
	}

	# Initialize the cache
	UpdateWordBreakREs
	trace add variable ::tcl_wordchars write ::tcl::UpdateWordBreakREs
	trace add variable ::tcl_nonwordchars write ::tcl::UpdateWordBreakREs
	}

	# tcl_wordBreakAfter --
	#
	# This procedure returns the index of the first word boundary after the
	# starting point in the given string, or -1 if there are no more boundaries in
	# the given string. The index returned refers to the first character of the
	# pair that comprises a boundary.
	#
	# Arguments:
	# str - String to search.
	# start - Index into string specifying starting point.

	proc tcl_wordBreakAfter {str start} {
	variable ::tcl::WordBreakRE
	set result {-1 -1}
	regexp -indices -start $start -- $WordBreakRE(after) $str result
	return [lindex $result 1]
	}

	# tcl_wordBreakBefore --
	#
	# This procedure returns the index of the first word boundary before the
	# starting point in the given string, or -1 if there are no more boundaries in
	# the given string. The index returned refers to the second character of the
	# pair that comprises a boundary.
	#
	# Arguments:
	# str - String to search.
	# start - Index into string specifying starting point.

	proc tcl_wordBreakBefore {str start} {
	variable ::tcl::WordBreakRE
	set result {-1 -1}
	regexp -indices -- $WordBreakRE(before) [string range $str 0 $start] result
	return [lindex $result 1]
	}

	# tcl_endOfWord --
	#
	# This procedure returns the index of the first end-of-word location after a
	# starting index in the given string. An end-of-word location is defined to be
	# the first whitespace character following the first non-whitespace character
	# after the starting point. Returns -1 if there are no more words after the
	# starting point.
	#
	# Arguments:
	# str - String to search.
	# start - Index into string specifying starting point.

	proc tcl_endOfWord {str start} {
	variable ::tcl::WordBreakRE
	set result {-1 -1}
	regexp -indices -start $start -- $WordBreakRE(end) $str result
	return [lindex $result 1]
	}

	# tcl_startOfNextWord --
	#
	# This procedure returns the index of the first start-of-word location after a
	# starting index in the given string. A start-of-word location is defined to
	# be a non-whitespace character following a whitespace character. Returns -1
	# if there are no more start-of-word locations after the starting point.
	#
	# Arguments:
	# str - String to search.
	# start - Index into string specifying starting point.

	proc tcl_startOfNextWord {str start} {
	variable ::tcl::WordBreakRE
	set result {-1 -1}
	regexp -indices -start $start -- $WordBreakRE(next) $str result
	return [lindex $result 1]
	}

	# tcl_startOfPreviousWord --
	#
	# This procedure returns the index of the first start-of-word location before
	# a starting index in the given string.
	#
	# Arguments:
	# str - String to search.
	# start - Index into string specifying starting point.

	proc tcl_startOfPreviousWord {str start} {
	variable ::tcl::WordBreakRE
	set word {-1 -1}
	if {$start > 0} {
	regexp -indices -- $WordBreakRE(previous) [string range [string range $str 0 $start] 0 end-1] \
	result word
	}
	return [lindex $word 0]
	}