Goran Glavaš
commited on
Commit
•
80dbedc
1
Parent(s):
cf27868
Integrated resources into project
Browse files- README.txt +20 -0
- binary/graphseg.jar +2 -2
- source/pom.xml +3 -0
- source/res/embeddings.txt +3 -0
- source/res/freqs.txt +0 -0
- source/res/stopwords.txt +330 -0
- source/src/config.properties +0 -3
- source/src/edu/uma/nlp/graphseg/Start.java +7 -4
- source/src/edu/uma/nlp/graphseg/semantics/WordVectorSpace.java +3 -2
- source/src/edu/uma/nlp/graphseg/utils/ApplicationConfiguration.java +0 -49
- source/src/edu/uma/nlp/graphseg/utils/IOHelper.java +7 -0
README.txt
CHANGED
@@ -31,6 +31,14 @@ Example command:
|
|
31 |
|
32 |
java -jar graphseg.jar /home/seg-input /home/seg-output 0.25 3
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
Credit
|
35 |
========
|
36 |
|
@@ -48,6 +56,18 @@ In case you use GraphSeg in your research, please give approproate credit to our
|
|
48 |
url = {http://anthology.aclweb.org/S16-2016}
|
49 |
}
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
Contact
|
52 |
========
|
53 |
|
|
|
31 |
|
32 |
java -jar graphseg.jar /home/seg-input /home/seg-output 0.25 3
|
33 |
|
34 |
+
The tool's correct execution depends on the resources in the /source/res directory. These three files are as follows:
|
35 |
+
|
36 |
+
(1) embeddings.txt -- the word embeddings used for measuring semantic similarity between sentences. The default file used are 200-dimensional GloVe embeddings obtained on Wikipedia 2014 + Giga 5 corpus (http://nlp.stanford.edu/data/glove.6B.zip).
|
37 |
+
(2) stopwords.txt -- the list of English stopwords (excluded from sentences when measuring semantic similarity)
|
38 |
+
(3) freqs.txt -- frequencies of English words on a large corpus, needed for the IC-weighting of word contribution
|
39 |
+
|
40 |
+
You may choose to replace these default files (e.g., by using different embeddings or different stopword list), but make sure you name the new files exactly the same (i.e., embeddings.txt, stopwords.txt, and freqs.txt, respectively).
|
41 |
+
|
42 |
Credit
|
43 |
========
|
44 |
|
|
|
56 |
url = {http://anthology.aclweb.org/S16-2016}
|
57 |
}
|
58 |
|
59 |
+
License
|
60 |
+
========
|
61 |
+
|
62 |
+
The GraphSeg tool is licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0) license: https://creativecommons.org/licenses/by-nc-sa/4.0/
|
63 |
+
|
64 |
+
In short, this means:
|
65 |
+
|
66 |
+
(1) you must give appropriate credit, provide a link to the license, and indicate if changes were made. You may do so in any reasonable manner, but not in any way that suggests the licensor endorses you or your use;
|
67 |
+
(2) you may not use the material for commercial purposes;
|
68 |
+
(3) if you remix, transform, or build upon the material, you must distribute your contributions under the same license as the original (i.e., CC BY-NC-SA 4.0).
|
69 |
+
|
70 |
+
|
71 |
Contact
|
72 |
========
|
73 |
|
binary/graphseg.jar
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:72d083746b5228e96f05bf9dc3d91a4b21b89318fe9e1eaff9b539ce9adbd825
|
3 |
+
size 616819637
|
source/pom.xml
CHANGED
@@ -14,6 +14,9 @@
|
|
14 |
<exclude>**/*.java</exclude>
|
15 |
</excludes>
|
16 |
</resource>
|
|
|
|
|
|
|
17 |
</resources>
|
18 |
<plugins>
|
19 |
<plugin>
|
|
|
14 |
<exclude>**/*.java</exclude>
|
15 |
</excludes>
|
16 |
</resource>
|
17 |
+
<resource>
|
18 |
+
<directory>res</directory>
|
19 |
+
</resource>
|
20 |
</resources>
|
21 |
<plugins>
|
22 |
<plugin>
|
source/res/embeddings.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:18870b0a7516e4a72b44d3c226c242d2d846008967d8ce40b94c723a94d1a32b
|
3 |
+
size 693432828
|
source/res/freqs.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
source/res/stopwords.txt
ADDED
@@ -0,0 +1,330 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
's
|
2 |
+
a
|
3 |
+
about
|
4 |
+
above
|
5 |
+
across
|
6 |
+
after
|
7 |
+
afterwards
|
8 |
+
again
|
9 |
+
against
|
10 |
+
ago
|
11 |
+
all
|
12 |
+
almost
|
13 |
+
alone
|
14 |
+
along
|
15 |
+
already
|
16 |
+
also
|
17 |
+
although
|
18 |
+
always
|
19 |
+
am
|
20 |
+
among
|
21 |
+
amongst
|
22 |
+
amount
|
23 |
+
an
|
24 |
+
and
|
25 |
+
another
|
26 |
+
any
|
27 |
+
anyhow
|
28 |
+
anyone
|
29 |
+
anything
|
30 |
+
anyway
|
31 |
+
anywhere
|
32 |
+
are
|
33 |
+
around
|
34 |
+
as
|
35 |
+
at
|
36 |
+
back
|
37 |
+
be
|
38 |
+
became
|
39 |
+
because
|
40 |
+
become
|
41 |
+
becomes
|
42 |
+
becoming
|
43 |
+
been
|
44 |
+
before
|
45 |
+
beforehand
|
46 |
+
behind
|
47 |
+
being
|
48 |
+
below
|
49 |
+
beside
|
50 |
+
besides
|
51 |
+
between
|
52 |
+
beyond
|
53 |
+
bill
|
54 |
+
both
|
55 |
+
bottom
|
56 |
+
but
|
57 |
+
by
|
58 |
+
call
|
59 |
+
can
|
60 |
+
cannot
|
61 |
+
cant
|
62 |
+
co
|
63 |
+
computer
|
64 |
+
con
|
65 |
+
consist
|
66 |
+
could
|
67 |
+
couldnt
|
68 |
+
cry
|
69 |
+
de
|
70 |
+
describe
|
71 |
+
detail
|
72 |
+
do
|
73 |
+
done
|
74 |
+
down
|
75 |
+
due
|
76 |
+
during
|
77 |
+
each
|
78 |
+
eg
|
79 |
+
eight
|
80 |
+
either
|
81 |
+
eleven
|
82 |
+
else
|
83 |
+
elsewhere
|
84 |
+
empty
|
85 |
+
enough
|
86 |
+
etc
|
87 |
+
etc.
|
88 |
+
even
|
89 |
+
ever
|
90 |
+
every
|
91 |
+
everyone
|
92 |
+
everything
|
93 |
+
everywhere
|
94 |
+
except
|
95 |
+
few
|
96 |
+
fifteen
|
97 |
+
fifty
|
98 |
+
fill
|
99 |
+
find
|
100 |
+
fire
|
101 |
+
fig.
|
102 |
+
first
|
103 |
+
five
|
104 |
+
for
|
105 |
+
former
|
106 |
+
formerly
|
107 |
+
forty
|
108 |
+
found
|
109 |
+
four
|
110 |
+
from
|
111 |
+
front
|
112 |
+
full
|
113 |
+
further
|
114 |
+
get
|
115 |
+
give
|
116 |
+
go
|
117 |
+
had
|
118 |
+
has
|
119 |
+
hasnt
|
120 |
+
have
|
121 |
+
he
|
122 |
+
hence
|
123 |
+
her
|
124 |
+
here
|
125 |
+
hereafter
|
126 |
+
hereby
|
127 |
+
herein
|
128 |
+
hereupon
|
129 |
+
hers
|
130 |
+
herself
|
131 |
+
him
|
132 |
+
himself
|
133 |
+
his
|
134 |
+
how
|
135 |
+
however
|
136 |
+
hundred
|
137 |
+
i
|
138 |
+
ie
|
139 |
+
if
|
140 |
+
in
|
141 |
+
inc
|
142 |
+
indeed
|
143 |
+
interest
|
144 |
+
into
|
145 |
+
is
|
146 |
+
it
|
147 |
+
its
|
148 |
+
itself
|
149 |
+
keep
|
150 |
+
last
|
151 |
+
latter
|
152 |
+
latterly
|
153 |
+
least
|
154 |
+
less
|
155 |
+
ltd
|
156 |
+
made
|
157 |
+
many
|
158 |
+
may
|
159 |
+
me
|
160 |
+
meanwhile
|
161 |
+
might
|
162 |
+
mill
|
163 |
+
mine
|
164 |
+
more
|
165 |
+
moreover
|
166 |
+
most
|
167 |
+
mostly
|
168 |
+
move
|
169 |
+
much
|
170 |
+
must
|
171 |
+
my
|
172 |
+
myself
|
173 |
+
name
|
174 |
+
namely
|
175 |
+
neither
|
176 |
+
never
|
177 |
+
nevertheless
|
178 |
+
next
|
179 |
+
nine
|
180 |
+
no
|
181 |
+
nobody
|
182 |
+
none
|
183 |
+
noone
|
184 |
+
nor
|
185 |
+
not
|
186 |
+
n't
|
187 |
+
nothing
|
188 |
+
now
|
189 |
+
nowhere
|
190 |
+
of
|
191 |
+
off
|
192 |
+
often
|
193 |
+
on
|
194 |
+
once
|
195 |
+
one
|
196 |
+
one's
|
197 |
+
only
|
198 |
+
onto
|
199 |
+
or
|
200 |
+
other
|
201 |
+
others
|
202 |
+
otherwise
|
203 |
+
our
|
204 |
+
ours
|
205 |
+
ourselves
|
206 |
+
out
|
207 |
+
over
|
208 |
+
own
|
209 |
+
part
|
210 |
+
per
|
211 |
+
perhaps
|
212 |
+
please
|
213 |
+
put
|
214 |
+
rather
|
215 |
+
re
|
216 |
+
same
|
217 |
+
see
|
218 |
+
seem
|
219 |
+
seemed
|
220 |
+
sb
|
221 |
+
sb.
|
222 |
+
seeming
|
223 |
+
seems
|
224 |
+
serious
|
225 |
+
several
|
226 |
+
she
|
227 |
+
should
|
228 |
+
show
|
229 |
+
side
|
230 |
+
since
|
231 |
+
sincere
|
232 |
+
six
|
233 |
+
sixty
|
234 |
+
so
|
235 |
+
some
|
236 |
+
somehow
|
237 |
+
someone
|
238 |
+
something
|
239 |
+
sometime
|
240 |
+
sometimes
|
241 |
+
somewhere
|
242 |
+
st
|
243 |
+
st.
|
244 |
+
still
|
245 |
+
such
|
246 |
+
system
|
247 |
+
take
|
248 |
+
ten
|
249 |
+
than
|
250 |
+
that
|
251 |
+
the
|
252 |
+
their
|
253 |
+
them
|
254 |
+
themselves
|
255 |
+
then
|
256 |
+
thence
|
257 |
+
there
|
258 |
+
thereafter
|
259 |
+
thereby
|
260 |
+
therefore
|
261 |
+
therein
|
262 |
+
thereupon
|
263 |
+
these
|
264 |
+
they
|
265 |
+
thick
|
266 |
+
thin
|
267 |
+
third
|
268 |
+
this
|
269 |
+
those
|
270 |
+
though
|
271 |
+
three
|
272 |
+
through
|
273 |
+
throughout
|
274 |
+
thru
|
275 |
+
thus
|
276 |
+
to
|
277 |
+
together
|
278 |
+
too
|
279 |
+
top
|
280 |
+
toward
|
281 |
+
towards
|
282 |
+
twelve
|
283 |
+
twenty
|
284 |
+
two
|
285 |
+
un
|
286 |
+
under
|
287 |
+
until
|
288 |
+
up
|
289 |
+
upon
|
290 |
+
us
|
291 |
+
use
|
292 |
+
very
|
293 |
+
via
|
294 |
+
was
|
295 |
+
we
|
296 |
+
well
|
297 |
+
were
|
298 |
+
what
|
299 |
+
whatever
|
300 |
+
when
|
301 |
+
whence
|
302 |
+
whenever
|
303 |
+
where
|
304 |
+
whereafter
|
305 |
+
whereas
|
306 |
+
whereby
|
307 |
+
wherein
|
308 |
+
whereupon
|
309 |
+
wherever
|
310 |
+
whether
|
311 |
+
which
|
312 |
+
while
|
313 |
+
whither
|
314 |
+
who
|
315 |
+
whoever
|
316 |
+
whole
|
317 |
+
whom
|
318 |
+
whose
|
319 |
+
why
|
320 |
+
will
|
321 |
+
with
|
322 |
+
within
|
323 |
+
without
|
324 |
+
would
|
325 |
+
yet
|
326 |
+
you
|
327 |
+
your
|
328 |
+
yours
|
329 |
+
yourself
|
330 |
+
yourselves
|
source/src/config.properties
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
inf-cont-path=C:/Goran/Corpora/unigram-freqs-english.txt
|
2 |
-
word-vec-path=C:/Goran/Corpora/WordVectors/glove-vectors-6b-200d.txt
|
3 |
-
stop-words-path=C:/Goran/Corpora/stopwords.txt
|
|
|
|
|
|
|
|
source/src/edu/uma/nlp/graphseg/Start.java
CHANGED
@@ -19,7 +19,6 @@ import edu.uma.nlp.graphseg.preprocessing.StanfordAnnotator;
|
|
19 |
import edu.uma.nlp.graphseg.semantics.InformationContent;
|
20 |
import edu.uma.nlp.graphseg.semantics.SemanticSimilarity;
|
21 |
import edu.uma.nlp.graphseg.semantics.WordVectorSpace;
|
22 |
-
import edu.uma.nlp.graphseg.utils.ApplicationConfiguration;
|
23 |
import edu.uma.nlp.graphseg.utils.IOHelper;
|
24 |
import edu.uma.nlp.graphseg.utils.MemoryStorage;
|
25 |
|
@@ -74,11 +73,15 @@ public class Start {
|
|
74 |
return;
|
75 |
}
|
76 |
|
77 |
-
|
|
|
|
|
|
|
78 |
MemoryStorage.setWordVectorSpace(new WordVectorSpace());
|
79 |
-
MemoryStorage.getWordVectorSpace().load(
|
80 |
|
81 |
-
|
|
|
82 |
|
83 |
|
84 |
SemanticSimilarity.setStopwords(stopwords);
|
|
|
19 |
import edu.uma.nlp.graphseg.semantics.InformationContent;
|
20 |
import edu.uma.nlp.graphseg.semantics.SemanticSimilarity;
|
21 |
import edu.uma.nlp.graphseg.semantics.WordVectorSpace;
|
|
|
22 |
import edu.uma.nlp.graphseg.utils.IOHelper;
|
23 |
import edu.uma.nlp.graphseg.utils.MemoryStorage;
|
24 |
|
|
|
73 |
return;
|
74 |
}
|
75 |
|
76 |
+
String stopwordsPath = (new File("res/stopwords.txt").getAbsolutePath());
|
77 |
+
List<String> stopwords = IOHelper.getAllLines(stopwordsPath);
|
78 |
+
|
79 |
+
String embeddingsPath = (new File("res/embeddings.txt")).getAbsolutePath();
|
80 |
MemoryStorage.setWordVectorSpace(new WordVectorSpace());
|
81 |
+
MemoryStorage.getWordVectorSpace().load(embeddingsPath, null);
|
82 |
|
83 |
+
String freqsPath = (new File("res/freqs.txt").getAbsolutePath());
|
84 |
+
MemoryStorage.setInformationContent(new InformationContent(freqsPath, 1));
|
85 |
|
86 |
|
87 |
SemanticSimilarity.setStopwords(stopwords);
|
source/src/edu/uma/nlp/graphseg/semantics/WordVectorSpace.java
CHANGED
@@ -3,10 +3,11 @@ package edu.uma.nlp.graphseg.semantics;
|
|
3 |
import java.io.BufferedReader;
|
4 |
import java.io.BufferedWriter;
|
5 |
import java.io.File;
|
|
|
6 |
import java.io.FileNotFoundException;
|
7 |
import java.io.FileOutputStream;
|
8 |
-
import java.io.FileReader;
|
9 |
import java.io.IOException;
|
|
|
10 |
import java.io.OutputStreamWriter;
|
11 |
import java.util.ArrayList;
|
12 |
import java.util.HashMap;
|
@@ -29,7 +30,7 @@ public class WordVectorSpace {
|
|
29 |
{
|
30 |
embeddings = new HashMap<String, double[]>();
|
31 |
|
32 |
-
try (BufferedReader br = new BufferedReader(new
|
33 |
String line;
|
34 |
int counter = 0;
|
35 |
while ((line = br.readLine()) != null) {
|
|
|
3 |
import java.io.BufferedReader;
|
4 |
import java.io.BufferedWriter;
|
5 |
import java.io.File;
|
6 |
+
import java.io.FileInputStream;
|
7 |
import java.io.FileNotFoundException;
|
8 |
import java.io.FileOutputStream;
|
|
|
9 |
import java.io.IOException;
|
10 |
+
import java.io.InputStreamReader;
|
11 |
import java.io.OutputStreamWriter;
|
12 |
import java.util.ArrayList;
|
13 |
import java.util.HashMap;
|
|
|
30 |
{
|
31 |
embeddings = new HashMap<String, double[]>();
|
32 |
|
33 |
+
try (BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(path), "UTF8"))) {
|
34 |
String line;
|
35 |
int counter = 0;
|
36 |
while ((line = br.readLine()) != null) {
|
source/src/edu/uma/nlp/graphseg/utils/ApplicationConfiguration.java
DELETED
@@ -1,49 +0,0 @@
|
|
1 |
-
package edu.uma.nlp.graphseg.utils;
|
2 |
-
|
3 |
-
import java.io.IOException;
|
4 |
-
import java.io.InputStream;
|
5 |
-
import java.util.Properties;
|
6 |
-
|
7 |
-
public class ApplicationConfiguration {
|
8 |
-
|
9 |
-
public static ApplicationConfiguration config = new ApplicationConfiguration();
|
10 |
-
|
11 |
-
private Properties prop;
|
12 |
-
|
13 |
-
public ApplicationConfiguration()
|
14 |
-
{
|
15 |
-
prop = new Properties();
|
16 |
-
InputStream inStream = getClass().getClassLoader().getResourceAsStream("config.properties");
|
17 |
-
|
18 |
-
if (inStream != null)
|
19 |
-
{
|
20 |
-
try
|
21 |
-
{
|
22 |
-
prop.load(inStream);
|
23 |
-
|
24 |
-
}
|
25 |
-
catch (IOException e) {
|
26 |
-
e.printStackTrace();
|
27 |
-
}
|
28 |
-
finally
|
29 |
-
{
|
30 |
-
try
|
31 |
-
{
|
32 |
-
inStream.close();
|
33 |
-
}
|
34 |
-
catch (IOException e) {
|
35 |
-
e.printStackTrace();
|
36 |
-
}
|
37 |
-
}
|
38 |
-
}
|
39 |
-
}
|
40 |
-
|
41 |
-
public String getValue(String key)
|
42 |
-
{
|
43 |
-
if (prop != null)
|
44 |
-
{
|
45 |
-
return prop.getProperty(key);
|
46 |
-
}
|
47 |
-
else return null;
|
48 |
-
}
|
49 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
source/src/edu/uma/nlp/graphseg/utils/IOHelper.java
CHANGED
@@ -11,6 +11,7 @@ import java.io.InputStream;
|
|
11 |
import java.io.InputStreamReader;
|
12 |
import java.io.OutputStreamWriter;
|
13 |
import java.io.UnsupportedEncodingException;
|
|
|
14 |
import java.nio.file.Files;
|
15 |
import java.nio.file.Paths;
|
16 |
import java.util.ArrayList;
|
@@ -35,6 +36,12 @@ public class IOHelper {
|
|
35 |
}
|
36 |
}
|
37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
public static List<String> getAllLinesWithoutEmpty(String path)
|
39 |
{
|
40 |
try {
|
|
|
11 |
import java.io.InputStreamReader;
|
12 |
import java.io.OutputStreamWriter;
|
13 |
import java.io.UnsupportedEncodingException;
|
14 |
+
import java.nio.charset.StandardCharsets;
|
15 |
import java.nio.file.Files;
|
16 |
import java.nio.file.Paths;
|
17 |
import java.util.ArrayList;
|
|
|
36 |
}
|
37 |
}
|
38 |
|
39 |
+
public static List<String> getAllLinesStream(InputStream stream)
|
40 |
+
{
|
41 |
+
List<String> doc = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)).lines().collect(Collectors.toList());
|
42 |
+
return doc;
|
43 |
+
}
|
44 |
+
|
45 |
public static List<String> getAllLinesWithoutEmpty(String path)
|
46 |
{
|
47 |
try {
|