File size: 6,078 Bytes
1ce325b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
#ifndef LM_COMMON_COMPARE_H
#define LM_COMMON_COMPARE_H

#include "ngram.hh"
#include "../word_index.hh"

#include <functional>
#include <string>

namespace lm {

/**
 * Abstract parent class for defining custom n-gram comparators.
 */
template <class Child> class Comparator : public std::binary_function<const void *, const void *, bool> {
  public:

    /**
     * Constructs a comparator capable of comparing two n-grams.
     *
     * @param order Number of words in each n-gram
     */
    explicit Comparator(std::size_t order) : order_(order) {}

    /**
     * Applies the comparator using the Compare method that must be defined in any class that inherits from this class.
     *
     * @param lhs A pointer to the n-gram on the left-hand side of the comparison
     * @param rhs A pointer to the n-gram on the right-hand side of the comparison
     *
     * @see ContextOrder::Compare
     * @see PrefixOrder::Compare
     * @see SuffixOrder::Compare
     */
    inline bool operator()(const void *lhs, const void *rhs) const {
      return static_cast<const Child*>(this)->Compare(static_cast<const WordIndex*>(lhs), static_cast<const WordIndex*>(rhs));
    }

    /** Gets the n-gram order defined for this comparator. */
    std::size_t Order() const { return order_; }

  protected:
    std::size_t order_;
};

/**
 * N-gram comparator that compares n-grams according to their reverse (suffix) order.
 *
 * This comparator compares n-grams lexicographically, one word at a time,
 * beginning with the last word of each n-gram and ending with the first word of each n-gram.
 *
 * Some examples of n-gram comparisons as defined by this comparator:
 * - a b c == a b c
 * - a b c < a b d
 * - a b c > a d b
 * - a b c > a b b
 * - a b c > x a c
 * - a b c < x y z
 */
class SuffixOrder : public Comparator<SuffixOrder> {
  public:

    /**
     * Constructs a comparator capable of comparing two n-grams.
     *
     * @param order Number of words in each n-gram
     */
    explicit SuffixOrder(std::size_t order) : Comparator<SuffixOrder>(order) {}

    /**
     * Compares two n-grams lexicographically, one word at a time,
     * beginning with the last word of each n-gram and ending with the first word of each n-gram.
     *
     * @param lhs A pointer to the n-gram on the left-hand side of the comparison
     * @param rhs A pointer to the n-gram on the right-hand side of the comparison
     */
    inline bool Compare(const WordIndex *lhs, const WordIndex *rhs) const {
      for (std::size_t i = order_ - 1; i != 0; --i) {
        if (lhs[i] != rhs[i])
          return lhs[i] < rhs[i];
      }
      return lhs[0] < rhs[0];
    }

    static const unsigned kMatchOffset = 1;
};


/**
  * N-gram comparator that compares n-grams according to the reverse (suffix) order of the n-gram context.
  *
  * This comparator compares n-grams lexicographically, one word at a time,
  * beginning with the penultimate word of each n-gram and ending with the first word of each n-gram;
  * finally, this comparator compares the last word of each n-gram.
  *
  * Some examples of n-gram comparisons as defined by this comparator:
  * - a b c == a b c
  * - a b c < a b d
  * - a b c < a d b
  * - a b c > a b b
  * - a b c > x a c
  * - a b c < x y z
  */
class ContextOrder : public Comparator<ContextOrder> {
  public:

    /**
     * Constructs a comparator capable of comparing two n-grams.
     *
     * @param order Number of words in each n-gram
     */
    explicit ContextOrder(std::size_t order) : Comparator<ContextOrder>(order) {}

    /**
     * Compares two n-grams lexicographically, one word at a time,
     * beginning with the penultimate word of each n-gram and ending with the first word of each n-gram;
     * finally, this comparator compares the last word of each n-gram.
     *
     * @param lhs A pointer to the n-gram on the left-hand side of the comparison
     * @param rhs A pointer to the n-gram on the right-hand side of the comparison
     */
    inline bool Compare(const WordIndex *lhs, const WordIndex *rhs) const {
      for (int i = order_ - 2; i >= 0; --i) {
        if (lhs[i] != rhs[i])
          return lhs[i] < rhs[i];
      }
      return lhs[order_ - 1] < rhs[order_ - 1];
    }
};

/**
 * N-gram comparator that compares n-grams according to their natural (prefix) order.
 *
 * This comparator compares n-grams lexicographically, one word at a time,
 * beginning with the first word of each n-gram and ending with the last word of each n-gram.
 *
 * Some examples of n-gram comparisons as defined by this comparator:
 * - a b c == a b c
 * - a b c < a b d
 * - a b c < a d b
 * - a b c > a b b
 * - a b c < x a c
 * - a b c < x y z
 */
class PrefixOrder : public Comparator<PrefixOrder> {
  public:

    /**
     * Constructs a comparator capable of comparing two n-grams.
     *
     * @param order Number of words in each n-gram
     */
    explicit PrefixOrder(std::size_t order) : Comparator<PrefixOrder>(order) {}

    /**
     * Compares two n-grams lexicographically, one word at a time,
     * beginning with the first word of each n-gram and ending with the last word of each n-gram.
     *
     * @param lhs A pointer to the n-gram on the left-hand side of the comparison
     * @param rhs A pointer to the n-gram on the right-hand side of the comparison
     */
    inline bool Compare(const WordIndex *lhs, const WordIndex *rhs) const {
      for (std::size_t i = 0; i < order_; ++i) {
        if (lhs[i] != rhs[i])
          return lhs[i] < rhs[i];
      }
      return false;
    }

    static const unsigned kMatchOffset = 0;
};

template <class Range> struct SuffixLexicographicLess : public std::binary_function<const Range, const Range, bool> {
  bool operator()(const Range first, const Range second) const {
    for (const WordIndex *f = first.end() - 1, *s = second.end() - 1; f >= first.begin() && s >= second.begin(); --f, --s) {
      if (*f < *s) return true;
      if (*f > *s) return false;
    }
    return first.size() < second.size();
  }
};

} // namespace lm

#endif // LM_COMMON_COMPARE_H