;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;; ;;; Centre for Speech Technology Research ;; ;;; University of Edinburgh, UK ;; ;;; Copyright (c) 1997 ;; ;;; All Rights Reserved. ;; ;;; ;; ;;; Permission is hereby granted, free of charge, to use and distribute ;; ;;; this software and its documentation without restriction, including ;; ;;; without limitation the rights to use, copy, modify, merge, publish, ;; ;;; distribute, sublicense, and/or sell copies of this work, and to ;; ;;; permit persons to whom this work is furnished to do so, subject to ;; ;;; the following conditions: ;; ;;; 1. The code must retain the above copyright notice, this list of ;; ;;; conditions and the following disclaimer. ;; ;;; 2. Any modifications must be clearly marked as such. ;; ;;; 3. Original authors' names are not deleted. ;; ;;; 4. The authors' names are not used to endorse or promote products ;; ;;; derived from this software without specific prior written ;; ;;; permission. ;; ;;; ;; ;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;; ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;; ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;; ;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;; ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;; ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;; ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;; ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;; ;;; THIS SOFTWARE. ;; ;;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Author: Alan W Black ;;; Date: December 1997 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; THIS IS EXPERIMENTAL AND DOES *NOT* WORK ;;; ;;; ;;; An English morpho-syntax finite-state grammar ;;; This is used for morphological decomposition of unknown words ;;; specifically (only) words that are not found in the lexicon. ;;; This idea is that when an unknown word is found an attempt is made ;;; to see if it contains any well known morphological inflections or ;;; derivations, if so a better use of LTS can be made on the root, of ;;; none are found this ;;; ;;; ;;; Based on "Analysis of Unknown Words through Morphological ;;; Decomposition", Black, van de Plassche, Willians, European ACL 91. ;;; with the anyword matcher from a question by Lauri Karttunen after ;;; the talk. ;;; ;;; The suffixes and finite-state morph-syntax grammar is based ;;; (very roughly) on the rules in "Computational Morphology" ;;; Ritchie et al. MIT Press 1992. ;;; ;;; Can be compiled with ;;; wfst_build -type rg -o engmorphsyn.wfst -detmin engmorphsyn.scm ;;; ;;; The result can be combined with the morphographemic rules ;;; with ;;; wfst_build -type compose engmorph.wfst engmorphsyn.wfst -detmin -o engstemmer.wfst ;;; ;;; echo "# b o x e/+ s #" | wfst_run -wfst engstemmer.wfst -recog ;;; state 0 #/# -> 1 ;;; state 1 b/b -> 3 ;;; state 3 o/o -> 17 ;;; state 17 x/x -> 14 ;;; state 14 e/+ -> 36 ;;; state 36 s/s -> 34 ;;; state 34 #/# -> 16 ;;; OK. ;;; echo "# b o x e s #" | wfst_run -wfst engstemmer.wfst -recog ;;; state 0 #/# -> 1 ;;; state 1 b/b -> 3 ;;; state 3 o/o -> 17 ;;; state 17 x/x -> 14 ;;; state 14 e/e -> 22 ;;; state 22 s/s -> -1 (RegularGrammar engsuffixmorphosyntax ;; Sets ( (V a e i o u y) (C b c d f g h j k l m n p q r s t v w x y z) ) ;; Rules ( ;; A word *must* have a suffix to be recognized (Word -> # Syls Suffix ) (Word -> # Syls End ) ;; This matches any string of characters that contains at least one vowel (Syls -> Syl Syls ) (Syls -> Syl ) (Syl -> Cs V Cs ) (Cs -> C Cs ) (Cs -> ) (Suffix -> VerbSuffix ) (Suffix -> NounSuffix ) (Suffix -> AdjSuffix ) (VerbSuffix -> VerbFinal End ) (VerbSuffix -> VerbtoNoun NounSuffix ) (VerbSuffix -> VerbtoNoun End ) (VerbSuffix -> VerbtoAdj AdjSuffix ) (VerbSuffix -> VerbtoAdj End ) (NounSuffix -> NounFinal End ) (NounSuffix -> NountoNoun NounSuffix ) (NounSuffix -> NountoNoun End ) (NounSuffix -> NountoAdj AdjSuffix ) (NounSuffix -> NountoAdj End ) (NounSuffix -> NountoVerb VerbSuffix ) (NounSuffix -> NountoVerb End ) (AdjSuffix -> AdjFinal End ) (AdjSuffix -> AdjtoAdj AdjSuffix) (AdjSuffix -> AdjtoAdj End) (AdjSuffix -> AdjtoAdv End) ;; isn't any Adv to anything (End -> # ) ;; word boundary symbol *always* present (VerbFinal -> + e d) (VerbFinal -> + i n g) (VerbFinal -> + s) (VerbtoNoun -> + e r) (VerbtoNoun -> + e s s) (VerbtoNoun -> + a t i o n) (VerbtoNoun -> + i n g) (VerbtoNoun -> + m e n t) (VerbtoAdj -> + a b l e) (NounFinal -> + s) (NountoNoun -> + i s m) (NountoNoun -> + i s t) (NountoNoun -> + s h i p) (NountoAdj -> + l i k e) (NountoAdj -> + l e s s) (NountoAdj -> + i s h) (NountoAdj -> + o u s) (NountoVerb -> + i f y) (NountoVerb -> + i s e) (NountoVerb -> + i z e) (AdjFinal -> + e r) (AdjFinal -> + e s t) (AdjtoAdj -> + i s h) (AdjtoAdv -> + l y) (AdjtoNoun -> + n e s s) (AdjtoVerb -> + i s e) (AdjtoVerb -> + i z e) ) )