Spaces:
Runtime error
Runtime error
Push
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .env.demo +1 -1
- .gitattributes +0 -7
- data/concept/lilac/legal-termination/concept.json +0 -184
- data/concept/lilac/legal-termination/sbert.pkl +0 -0
- data/concept/lilac/negative-sentiment/concept.json +0 -634
- data/concept/lilac/negative-sentiment/sbert.pkl +0 -0
- data/concept/lilac/positive-sentiment/concept.json +0 -564
- data/concept/lilac/positive-sentiment/sbert.pkl +0 -0
- data/concept/lilac/profanity/concept.json +0 -0
- data/concept/lilac/profanity/openai.pkl +0 -3
- data/concept/lilac/profanity/sbert.pkl +0 -0
- data/concept/lilac/toxicity/concept.json +0 -0
- data/concept/lilac/toxicity/sbert.pkl +0 -0
- data/datasets/local/spotify/data-00000-of-00001.parquet +0 -3
- data/datasets/local/spotify/manifest.json +0 -27
- data/datasets/local/spotify/settings.json +0 -1
- data/datasets/local/spotify/text/.concepts/local/aliens/sbert-neg-100.pkl +0 -0
- data/datasets/local/spotify/text/lang_detection/data-00000-of-00001.parquet +0 -3
- data/datasets/local/spotify/text/lang_detection/signal_manifest.json +0 -36
- data/datasets/local/spotify/text/sbert/data-00000-of-00001.parquet +0 -3
- data/datasets/local/spotify/text/sbert/embedding/local/outerspace/v34/data-00000-of-00001.parquet +0 -3
- data/datasets/local/spotify/text/sbert/embedding/local/outerspace/v34/signal_manifest.json +0 -64
- data/datasets/local/spotify/text/sbert/embeddings-00000-of-00001.keys.pkl +0 -3
- data/datasets/local/spotify/text/sbert/embeddings-00000-of-00001.npy +0 -3
- data/datasets/local/spotify/text/sbert/signal_manifest.json +0 -37
- lilac/concepts/concept.py +6 -7
- lilac/config.py +33 -30
- lilac/data/dataset.py +7 -7
- lilac/data/dataset_duckdb.py +107 -164
- lilac/data/dataset_test_utils.py +16 -10
- lilac/data/dataset_utils.py +29 -22
- lilac/embeddings/vector_store.py +85 -4
- lilac/embeddings/vector_store_numpy.py +7 -11
- lilac/router_concept.py +2 -2
- lilac/router_dataset.py +2 -9
- lilac/schema.py +4 -4
- lilac/server.py +1 -1
- lilac/signals/concept_scorer.py +34 -17
- lilac/signals/lang_detection.py +15 -22
- lilac/signals/minhash_dup.py +2 -2
- lilac/signals/near_dup.py +0 -1
- lilac/signals/ner.py +0 -1
- lilac/signals/pii.py +0 -1
- lilac/signals/semantic_similarity.py +29 -23
- lilac/signals/signal.py +34 -70
- lilac/signals/splitters/chunk_splitter.py +26 -7
- lilac/signals/substring_search.py +0 -1
- lilac/signals/text_statistics.py +25 -9
- lilac/web/_app/immutable/assets/0.d7803630.css +0 -0
- lilac/web/_app/immutable/assets/ConceptView.98f1ad48.css +1 -0
.env.demo
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
LILAC_DATA_PATH='/data'
|
2 |
HF_HOME='/data/.huggingface'
|
3 |
-
HF_DATASETS_CACHE='/data/.cache'
|
4 |
TRANSFORMERS_CACHE='/data/.cache'
|
|
|
|
1 |
LILAC_DATA_PATH='/data'
|
2 |
HF_HOME='/data/.huggingface'
|
|
|
3 |
TRANSFORMERS_CACHE='/data/.cache'
|
4 |
+
XDG_CACHE_HOME='/data/.cache'
|
.gitattributes
DELETED
@@ -1,7 +0,0 @@
|
|
1 |
-
data/datasets/local/spotify/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
|
2 |
-
data/datasets/local/spotify/text/lang_detection/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
|
3 |
-
data/datasets/local/spotify/text/sbert/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
|
4 |
-
data/datasets/local/spotify/text/sbert/embedding/local/outerspace/v34/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
|
5 |
-
data/datasets/local/spotify/text/sbert/embeddings-00000-of-00001.keys.pkl filter=lfs diff=lfs merge=lfs -text
|
6 |
-
data/datasets/local/spotify/text/sbert/embeddings-00000-of-00001.npy filter=lfs diff=lfs merge=lfs -text
|
7 |
-
data/concept/lilac/profanity/openai.pkl filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data/concept/lilac/legal-termination/concept.json
DELETED
@@ -1,184 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"namespace": "lilac",
|
3 |
-
"concept_name": "legal-termination",
|
4 |
-
"type": "text",
|
5 |
-
"data": {
|
6 |
-
"731b1338cf1949958c3526c555f88058": {
|
7 |
-
"label": true,
|
8 |
-
"text": "In the event that any provision of this agreement is found to be unenforceable, the remaining provisions shall continue to be valid and binding.",
|
9 |
-
"id": "731b1338cf1949958c3526c555f88058"
|
10 |
-
},
|
11 |
-
"99a20e547e38474dbc24507a1658d0c9": {
|
12 |
-
"label": true,
|
13 |
-
"text": "The parties agree that in the event of a natural disaster or other unforeseen event, both parties will make reasonable efforts to fulfill their obligations under this contract.",
|
14 |
-
"id": "99a20e547e38474dbc24507a1658d0c9"
|
15 |
-
},
|
16 |
-
"3f27b47c526a4c5896a0a100024535c7": {
|
17 |
-
"label": true,
|
18 |
-
"text": "If any party breaches the terms of this agreement, the non-breaching party shall have the right to seek legal remedies.",
|
19 |
-
"id": "3f27b47c526a4c5896a0a100024535c7"
|
20 |
-
},
|
21 |
-
"d403dbb1ab9c4594bc7f7dcb0ad5b333": {
|
22 |
-
"label": true,
|
23 |
-
"text": "This lease agreement shall survive the termination or expiration of the lease term, and continue to be binding upon the parties.",
|
24 |
-
"id": "d403dbb1ab9c4594bc7f7dcb0ad5b333"
|
25 |
-
},
|
26 |
-
"b7deba9f7e80444abe14448f53f45c43": {
|
27 |
-
"label": true,
|
28 |
-
"text": "In the event of a dispute arising from this contract, the parties agree to first attempt to resolve the dispute through mediation before pursuing any legal action.",
|
29 |
-
"id": "b7deba9f7e80444abe14448f53f45c43"
|
30 |
-
},
|
31 |
-
"a82231b490174e62aad733cb0c75024d": {
|
32 |
-
"label": true,
|
33 |
-
"text": "This Agreement may be terminated, and the transactions contemplated hereby may be abandoned, at any time prior to the Effective Time, whether prior to or after the Company Stockholders' Approval:",
|
34 |
-
"id": "a82231b490174e62aad733cb0c75024d"
|
35 |
-
},
|
36 |
-
"160b25dbf14e4759a0065bbd652ce33f": {
|
37 |
-
"label": true,
|
38 |
-
"text": "This Agreement may be terminated and abandoned at any time prior to the Effective Time of the Merger, whether before or after the Company Stockholder Approval:",
|
39 |
-
"id": "160b25dbf14e4759a0065bbd652ce33f"
|
40 |
-
},
|
41 |
-
"8f5f9f96b16441228bb0c9b8a14c4e25": {
|
42 |
-
"label": false,
|
43 |
-
"text": "any jurisdiction, then such provision shall, as to such jurisdiction, be modified or restricted to the extent necessary to make such provision valid, binding and enforceable, or if such provision cannot be so modified or restricted, then such provision shall, as to such jurisdiction, be deemed to be excised from this Agreement; provided, however, that the legality, binding effect and",
|
44 |
-
"id": "8f5f9f96b16441228bb0c9b8a14c4e25"
|
45 |
-
},
|
46 |
-
"87b6c31b04a346b4a3e0da8d2cc5a7ac": {
|
47 |
-
"label": true,
|
48 |
-
"text": "This Agreement shall terminate automatically without any further action by any party hereto upon the earliest to occur of (a) the Effective Time of the Merger, (b) the termination of the Merger Agreement in accordance with its terms and (c) any amendment or other modification of the Merger Agreement that reduces the amount of the Merger Consideration or provides that the Merger Consideration shall",
|
49 |
-
"id": "87b6c31b04a346b4a3e0da8d2cc5a7ac"
|
50 |
-
},
|
51 |
-
"985344f7ecfb41f4a69ba101973221a1": {
|
52 |
-
"label": false,
|
53 |
-
"text": " During the Employment Period, the Corporation shall pay ----------- the Executive a base salary which, as of the commencement of the Employment Period, shall be at an annual rate of Two Hundred Fifty Thousand Dollars ($250,000). The base salary shall be payable in equal periodic installments which are not less frequent than the periodic installments in effect for salaries of other senior",
|
54 |
-
"id": "985344f7ecfb41f4a69ba101973221a1"
|
55 |
-
},
|
56 |
-
"5d53ff48376046fdab41e95c7f4bad54": {
|
57 |
-
"label": true,
|
58 |
-
"text": "This Agreement may be terminated at any time prior to the Closing Date solely:",
|
59 |
-
"id": "5d53ff48376046fdab41e95c7f4bad54"
|
60 |
-
},
|
61 |
-
"bdeb785be2154b21b4eb052466fa9bcb": {
|
62 |
-
"label": true,
|
63 |
-
"text": "(a) This Agreement may be terminated by you by notice to the Company at any time prior to the Closing Date if any of the following has occurred: (i) since the respective dates as of which information is given in the Registration Statement and the Prospectus, any material adverse change or any development involving a prospective material adverse change in or affecting the earnings, busi ness,",
|
64 |
-
"id": "bdeb785be2154b21b4eb052466fa9bcb"
|
65 |
-
},
|
66 |
-
"fe6871e9070441f8a9e4b3db26b077d7": {
|
67 |
-
"label": true,
|
68 |
-
"text": "Section 3(b), this Section 7 and Section 8 of this Agreement shall survive a termination of this Agreement pursuant to (a) or (b) above in this Section 7 until the date that is two years following the date of such termination. Notwithstanding anything else to the contrary contained herein or in the Merger Agreement, if the Effective Time occurs, the representations and warranties contained in",
|
69 |
-
"id": "fe6871e9070441f8a9e4b3db26b077d7"
|
70 |
-
},
|
71 |
-
"bf1a51751d0748e58c344aec8e5fc789": {
|
72 |
-
"label": false,
|
73 |
-
"text": "This Agreement may be executed in one or more counterparts (including counterparts executed and delivered by facsimile, which shall be as counterparts executed and delivered manually), all of which shall be considered one and the same agreement and shall become effective when one or more counterparts have been signed by each of the parties and delivered to the other party, it being understood that",
|
74 |
-
"id": "bf1a51751d0748e58c344aec8e5fc789"
|
75 |
-
},
|
76 |
-
"bc1b2affa6d848fd92d4dee033e30659": {
|
77 |
-
"label": false,
|
78 |
-
"text": "would, in your judgment, make it impracticable or inadvisable to market the Units or to enforce contracts for the sale of the Units, (iii) suspension of trading in securities generally on the New York Stock Exchange, the American Stock Exchange or the Nasdaq National Market or limitation on prices (other than limitations on hours or numbers of days of trading) for securities on any such Exchange,",
|
79 |
-
"id": "bc1b2affa6d848fd92d4dee033e30659"
|
80 |
-
},
|
81 |
-
"67a73d5887f74a91bed190ca8f64b17c": {
|
82 |
-
"label": false,
|
83 |
-
"text": " The authorized capital stock of FM consists of 1,000 shares of Common Stock, no par value each, of which 1,000 shares are issued and outstanding. There are no outstanding or authorized options, warrants, calls, subscriptions, rights (including any preemptive rights or rights of first refusal), agreements or commitments of any character obligating FM to issue any stock or any other Equity",
|
84 |
-
"id": "67a73d5887f74a91bed190ca8f64b17c"
|
85 |
-
},
|
86 |
-
"025b2ca5147849c8a921d9aaa31cd9cd": {
|
87 |
-
"label": false,
|
88 |
-
"text": "Taxes that are being contested in good faith by appropriate proceedings, provided that Holdings, the Borrower or Restricted Subsidiary, as the case may be, has set aside on its books adequate reserves therefor in accordance with GAAP.",
|
89 |
-
"id": "025b2ca5147849c8a921d9aaa31cd9cd"
|
90 |
-
},
|
91 |
-
"76acff27f13743f4822a094c707d8b75": {
|
92 |
-
"label": false,
|
93 |
-
"text": "have been a suspension or material limitation in trading in the Company\u2019s common stock on the New York Stock Exchange; (iii) there shall have been a general moratorium on commercial banking activities declared by either federal or New York state authorities or a material disruption in commercial banking or securities settlement or clearance services in the United States; (iv) there shall have been",
|
94 |
-
"id": "76acff27f13743f4822a094c707d8b75"
|
95 |
-
},
|
96 |
-
"b11a95c0eb564445b1a473e90622f861": {
|
97 |
-
"label": true,
|
98 |
-
"text": "10.1. This Agreement will terminate:",
|
99 |
-
"id": "b11a95c0eb564445b1a473e90622f861"
|
100 |
-
},
|
101 |
-
"d536428a02084d94ba18d412851cb913": {
|
102 |
-
"label": false,
|
103 |
-
"text": "may not be limited to his Base Salary and that the Employee may receive an annual bonus in the amount, if any, determined annually by the Employer. The Employee shall also participate in employee compensation and benefit plans available generally to executives of the Employer (including, without limitation, any tax-qualified profit sharing plan, nonqualified profit sharing plan, life insurance",
|
104 |
-
"id": "d536428a02084d94ba18d412851cb913"
|
105 |
-
},
|
106 |
-
"368bb1d9c7d0419d9ca58f28565eeb2e": {
|
107 |
-
"label": true,
|
108 |
-
"text": "This Agreement may be terminated in the absolute discretion of the Representatives, by notice to the Bank, if after execution and delivery of this Agreement and prior to the Closing Date (i) there has been, since the date of this Agreement or since the respective dates as of which information is given in the Registration Statement, the Time of Sale Information or the Prospectus, any material",
|
109 |
-
"id": "368bb1d9c7d0419d9ca58f28565eeb2e"
|
110 |
-
},
|
111 |
-
"1b5fd7b037a84404bf85c858953c79e8": {
|
112 |
-
"label": true,
|
113 |
-
"text": "however, (i) the right to terminate this Agreement under this Section 8 shall not be available to such Buyer if the failure of the transactions contemplated by this Agreement to have been consummated by such date is the result of such Buyer\u2019s breach of this Agreement and (ii) the abandonment of the sale and purchase of the Notes and the Warrants shall be applicable only to such Buyer providing",
|
114 |
-
"id": "1b5fd7b037a84404bf85c858953c79e8"
|
115 |
-
},
|
116 |
-
"6d5a23d2663f457cab96df03d9dc8ab7": {
|
117 |
-
"label": true,
|
118 |
-
"text": "In addition, any Stockholder may terminate this Agreement if Weatherford, WEUS, or the Company breaches any representation, warranty, covenant or other agreement contained in the Merger Agreement that (A) would give rise to the failure of Weatherford, WEUS, or the Company to satisfy any condition set forth in Section 8.2(a) thereof, and (B) cannot be or has not been cured within 45 days after the",
|
119 |
-
"id": "6d5a23d2663f457cab96df03d9dc8ab7"
|
120 |
-
},
|
121 |
-
"4a8223a48f83491b9b3eafd7ad37baf9": {
|
122 |
-
"label": true,
|
123 |
-
"text": "The obligations of the Underwriters hereunder may be terminated by the Representatives, in their absolute discretion, by notice given to and received by the Depositor or the Bank prior to delivery of and payment for the Notes if, prior to that time, any of the events described in Section 5(v) shall have occurred or any of the other conditions described in Section 5 shall not be satisfied.",
|
124 |
-
"id": "4a8223a48f83491b9b3eafd7ad37baf9"
|
125 |
-
},
|
126 |
-
"fbb152eae00c440bb2d0df0fbd82c262": {
|
127 |
-
"label": true,
|
128 |
-
"text": "Either of the parties hereto may terminate this Agreement by giving to the other party a notice in writing specifying the date of such termination, which shall be not less than 60 days after the date of receipt of such notice. In the event such notice is given by the Customer, it shall be accompanied by a copy of a resolution of the Board of Directors of the Customer, certified by its Secretary,",
|
129 |
-
"id": "fbb152eae00c440bb2d0df0fbd82c262"
|
130 |
-
},
|
131 |
-
"1d21880f426c45ada31409d22815cc87": {
|
132 |
-
"label": false,
|
133 |
-
"text": "Prospectus or the Final Prospectus (exclusive of any amendment or supplement thereof or thereto after the date hereof).",
|
134 |
-
"id": "1d21880f426c45ada31409d22815cc87"
|
135 |
-
},
|
136 |
-
"795cac72a3504740bc7401a84fc6fba4": {
|
137 |
-
"label": true,
|
138 |
-
"text": "This Agreement may be terminated by the Customer or the Bank by giving ninety (90) days written notice to the other, provided that such notice to the Bank shall specify the names of the persons to whom the Bank shall deliver the Assets in the Accounts. If notice of termination is given by the Bank, the Customer shall, within ninety (90) days following receipt of the notice, deliver to the Bank Instructions specifying the names of the persons to whom the Bank shall deliver the Assets.",
|
139 |
-
"id": "795cac72a3504740bc7401a84fc6fba4"
|
140 |
-
},
|
141 |
-
"3b82e6eba4894ac0b9f7f12aba2aab2e": {
|
142 |
-
"label": false,
|
143 |
-
"text": "of this Agreement, or to Authorized Persons, or may continue to hold the Assets until Instructions are provided to the Bank.",
|
144 |
-
"id": "3b82e6eba4894ac0b9f7f12aba2aab2e"
|
145 |
-
},
|
146 |
-
"da16bd0e9dce4d4c87400eab61b9b14c": {
|
147 |
-
"label": false,
|
148 |
-
"text": "into force of the Convention. In such event, the Convention shall cease to have effect:",
|
149 |
-
"id": "da16bd0e9dce4d4c87400eab61b9b14c"
|
150 |
-
},
|
151 |
-
"02cc328109984db094b0b02caec0d575": {
|
152 |
-
"label": true,
|
153 |
-
"text": "Survival. The rights and obligations contained in Sections 3 (\u201cOwnership of Work Product\u201d), 4 (\u201cOther Rights\u201d), 5 (\u201cLicense to Preexisting IP\u201d), 6 (\u201cRepresentations and Warranties\u201d), 8 (\u201cConfidential Information\u201d) and 12 (\u201cNon-solicitation\u201d) will survive any termination or expiration of this Agreement. ",
|
154 |
-
"id": "02cc328109984db094b0b02caec0d575"
|
155 |
-
},
|
156 |
-
"f8edf65d9acf4ff4a04459a3492ac426": {
|
157 |
-
"label": false,
|
158 |
-
"text": "Severability. Should any provisions of this Agreement be held by a court of law to be illegal, invalid or unenforceable, the legality, validity and enforceability of the remaining provisions of this Agreement will not be affected or impaired thereby. ",
|
159 |
-
"id": "f8edf65d9acf4ff4a04459a3492ac426"
|
160 |
-
},
|
161 |
-
"5a8517f359494ead8c11b6aff440480d": {
|
162 |
-
"label": false,
|
163 |
-
"text": "\u0095\tCommitted to deliver the best, we leave no room for customer grievances.\r\n\r\n",
|
164 |
-
"id": "5a8517f359494ead8c11b6aff440480d"
|
165 |
-
},
|
166 |
-
"a47d327d0f6e46fc861f86b2e0e54a2f": {
|
167 |
-
"label": false,
|
168 |
-
"text": "the due diligence and using our agreement creator to close the deal successfully. \r",
|
169 |
-
"id": "a47d327d0f6e46fc861f86b2e0e54a2f"
|
170 |
-
},
|
171 |
-
"811d0dcc92e14c5c881e903c7d4ff7b6": {
|
172 |
-
"label": false,
|
173 |
-
"text": "in accordance with customary procedures in the relevant markets, but in any event for a settlement period no longer than three months following the date of such commitment.",
|
174 |
-
"id": "811d0dcc92e14c5c881e903c7d4ff7b6"
|
175 |
-
},
|
176 |
-
"907f92e0d5704418944a559a4bfb96c7": {
|
177 |
-
"label": false,
|
178 |
-
"text": "terminate in accordance with Section 2 of the Investors\u2019 Rights Agreement.",
|
179 |
-
"id": "907f92e0d5704418944a559a4bfb96c7"
|
180 |
-
}
|
181 |
-
},
|
182 |
-
"version": 33,
|
183 |
-
"description": "Termination or survival clause in a legal document"
|
184 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data/concept/lilac/legal-termination/sbert.pkl
DELETED
Binary file (33.8 kB)
|
|
data/concept/lilac/negative-sentiment/concept.json
DELETED
@@ -1,634 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"namespace": "lilac",
|
3 |
-
"concept_name": "negative-sentiment",
|
4 |
-
"type": "text",
|
5 |
-
"data": {
|
6 |
-
"0": {
|
7 |
-
"label": true,
|
8 |
-
"text": "Starting To Be Annoyed By Becky...: I'm not sure why I keep reading these books, but I guess it's because I've read the first two so I'll keep reading the rest of the books. In the first book, I really found it amusing. I was a little annoyed by the fact that Becky couldn't stop spending, but then again that's why she is called a Shopaholic. In the second book, I felt more of the same it was just magniifed more. Now in the third book, I'm just down right annoyed by Becky Bloomwood. In this book, she wasn't going on crazy shopping sprees, just planning two different weddings because she was afraid to tell each person and because I feel she's really selfish. Still, I read the book because I wanted to see how she could get herself out of another situation. I will say that I love her friends Suze and Danny, her client Laurel and her husband Luke. Maybe that's why I keep reading. I will read the next book, but I'm sure I'll be just as annoyed when I'm done.",
|
9 |
-
"id": "0"
|
10 |
-
},
|
11 |
-
"1": {
|
12 |
-
"label": true,
|
13 |
-
"text": "the cover is fine - the pool is horrible: The entire pool was horrible. The cover was fine once we got it on, but we finally got rid of the pool after 2 weeks because it was so hard to set up and keep clean.",
|
14 |
-
"id": "1"
|
15 |
-
},
|
16 |
-
"2": {
|
17 |
-
"label": false,
|
18 |
-
"text": "Good album, not their best.: This album is probably the most marketable and radio friendly of all of dashboard's albums. For the peripheral listener it may be the right one to get to introduce you to this band. But as a Dashboard fan of 5 or so years I truly hope they return to their original sound for their next work. Not for the listen-ability but for the show. To this day the fans react best to the songs from \"Places\" or \"A Mark, A Mission.\" I recommend this album to everyone but I also recommend any and all of their other work.",
|
19 |
-
"id": "2"
|
20 |
-
},
|
21 |
-
"3": {
|
22 |
-
"label": true,
|
23 |
-
"text": "This is a horror novel, right?: Never one to pass up any vampire novel, I purchased Sips because the description seemed interesting. Vampires, Marquis de Sade, fetishism, yada yada yada. If this is a comedy, I give it 4 stars; however, I'll give it 1 star as a horror novel. Sade was rather boring; I would think a character as intense and multi-faceted as the Marquis de Sade would make for a more interesting vampire. The writing style isn't too bad, but overall I found the characters to be mildly amusing at best. The plot was thin, the end was anti-climactic, and the vampires were not very frightening. The book had little suspense, and it leaves a mile-wide opening for a sequel at the conclusion. I would, however, like to see something more of the vampire mutants lurking in the graveyard. They were the most riveting of any of the characters.",
|
24 |
-
"id": "3"
|
25 |
-
},
|
26 |
-
"4": {
|
27 |
-
"label": false,
|
28 |
-
"text": "Superb mix of global non secular musical denominations: I first heard Ms. Pook's music on the \"Eyes Wide Shut\" soundtrack (the masquerade ball scene) and was blown away; if ever there was a necessity for music to permeate a scene in a film this was it. She incorporates a blend of the traditional songs from faiths across continents and mixes them, for lack of a better comparison than similar to your quintessential raver d.j. (though these are better and definitively more original :) \"Oppenheimer\" is my favorite, and if you let the last track run for a few minutes a portion of the song will play once more. I can't wait to hear more of her stuff - these hymns are awesome.",
|
29 |
-
"id": "4"
|
30 |
-
},
|
31 |
-
"5": {
|
32 |
-
"label": false,
|
33 |
-
"text": "A moving and suspenseful story!: For anyone familiar with the occult, this book is likely to raise hairs on the back of your neck as you read. Even if you're not, the storyline is suspenseful and fascinating, and the characters evoke great sympathy and admiration. An excellent read.",
|
34 |
-
"id": "5"
|
35 |
-
},
|
36 |
-
"6": {
|
37 |
-
"label": false,
|
38 |
-
"text": "Simple & Easy to Use - A Practical approach to eating out: This guide is extremely to use. It provides sample menus that you'd see at Chinese, Indian and Thai restaurants. Then you are provided with descriptions of each dish and how it is prepared and the ingredients used. From there you are provided with specific considerations as to how the preparation or ingredient list may affect you if you have Gluten or Allergen issues.This book is the size of a passport and very organized and well written. The Chinese, Indian and Thai Cuisine Passport is perfect for making choices while traveling, or while dining at your favorite local restaurant.",
|
39 |
-
"id": "6"
|
40 |
-
},
|
41 |
-
"7": {
|
42 |
-
"label": true,
|
43 |
-
"text": "Being Fair....I am a fan of hers: and I really enjoyed her previous works, more than I could have ever imagined, but this record is horrible. The songs are trite, the lyrics are incredibly boring, indulgent and immature. The music is pop staple, with forgetable melodies and repetative chorus lines, I feel as if the studio wrote the entire album for her while she was sleeping, this just doesn't speak to any of her previous works at all. This album fits on the same shelf with a Nickelodeon-themed CD. Instead of heading in the direction of an artist like Alanis Morrisette, she is going backward and joining the ranks of Hannah Montana and the Naked Brothers Band. She is a great artist and her first two records are amazing. She is better than this CD and I am looking forward to her next effort.",
|
44 |
-
"id": "7"
|
45 |
-
},
|
46 |
-
"8": {
|
47 |
-
"label": true,
|
48 |
-
"text": "Sucked: I thought the DVD sucked tremendously. It was very boring and if I could, I would return it for a refund. There was only one \"small\" clip of Dylan himself. I'm very disappointed.",
|
49 |
-
"id": "8"
|
50 |
-
},
|
51 |
-
"9": {
|
52 |
-
"label": false,
|
53 |
-
"text": "Excellent product: Easy to install. If you have a newer furnace you probably do not need the swail switch as the HE220A comes with a Humistat which can be connected to the furnace. They recommend the Honeywell 32005847-001 Installation Kit, Bypass which is a little pricey and you can probably buy the pieces of this kit cheaper individually from Home Depot or Lowes or ACO as well as the filters.",
|
54 |
-
"id": "9"
|
55 |
-
},
|
56 |
-
"10": {
|
57 |
-
"label": false,
|
58 |
-
"text": "Very happy.: I am very happy with this trashcan. I was unable to find one in the stores to fit the space in my cabinet, but this one does the job. It is very sturdy and looks like it will put up with years of use.",
|
59 |
-
"id": "10"
|
60 |
-
},
|
61 |
-
"11": {
|
62 |
-
"label": true,
|
63 |
-
"text": "These aren't Throughbreds!: This makes me so mad. All these new authors are coming and changing the series. Nothings the same anymore and the plots are repeditive. Don't even bother reading these books until #32 these are like a different series. I don't know excactly what's happing but these new authors suck!",
|
64 |
-
"id": "11"
|
65 |
-
},
|
66 |
-
"12": {
|
67 |
-
"label": true,
|
68 |
-
"text": "Large and slow are a bad combination.: I bought this TV and returned it a week later, because it blurred so badly with motion that sports were unwatchable. I ended up buying a smaller Sony XBR4, and I have none of the issues (plus the picture is far, far better).This has nothing to do with 60 vs 120Hz. That is more important for DVDs and Blu-Ray signals that are 24fps (which doesn't divide evenly into 60 but does for 120). The LT52133 has an 8ms response time, which is extremely slow. A decent LCD should be 5 or lower.If you want an LCD, choose speed and quality over size. If you want size and quality but want to spend less, buy a plasma. Don't buy a big, cheap, slow LCD!I gave it 2 stars because I like the interface and remote.",
|
69 |
-
"id": "12"
|
70 |
-
},
|
71 |
-
"13": {
|
72 |
-
"label": true,
|
73 |
-
"text": "Skip it: This movie is very poorly written and the result is not distressing, just lame. The actors do their best but from very early on it is obvious that the material gives them nothing to work with. Fans of Colin Firth will experience a certain dim level of enjoyment. Minnie Driver is a treat but her character is no better written than the others. Vermont locations are worth something. With one or two moments of exception it's neither comedic nor romantic.",
|
74 |
-
"id": "13"
|
75 |
-
},
|
76 |
-
"14": {
|
77 |
-
"label": false,
|
78 |
-
"text": "Belive it i went to the concert?: hi everyone let me tell you i went to the concert i was amazed with what i saw cher was awsome i tell you buy the dvd. as i sat in front of the stage cher was doing a great job to us the she is living proof . So i urge you to buy it?",
|
79 |
-
"id": "14"
|
80 |
-
},
|
81 |
-
"15": {
|
82 |
-
"label": false,
|
83 |
-
"text": "Vale la pena.: En este libro se narra de una forma muy interesante la vida de una familia en particular. Lo que mas me gusto de este libro fue la manera en que la autora describe a lo largo del libro las personalidades de los sujetos envueltos en la novela; que vienen a ser muy distintos y extremos, lo cual, intensifica el drama... Definitivamente es un buen libro y lo recomiendo a todos.",
|
84 |
-
"id": "15"
|
85 |
-
},
|
86 |
-
"16": {
|
87 |
-
"label": false,
|
88 |
-
"text": "Nummie Children's story: I ordered this book for our grandchildren. Two boys 5 & 3 and a 4 month old girl. All love the story. The mouse is determined.",
|
89 |
-
"id": "16"
|
90 |
-
},
|
91 |
-
"17": {
|
92 |
-
"label": true,
|
93 |
-
"text": "Seem to be alone on this one: Looking at the other reviews, I seem to be the only one that was disappointed with this book. The content is too babyish in most of it for older tweens and the more \"grown up\" content would be over a younger tween's head. I had a quick read through and with every paged turned, I thought duh. I'll be looking around for another book shortly.",
|
94 |
-
"id": "17"
|
95 |
-
},
|
96 |
-
"18": {
|
97 |
-
"label": false,
|
98 |
-
"text": "Best yet: by far the best EA game yet. I especially like the easy controls and kick - a graphics. the playbook is extremely accurate and detailed. Also the fight songs and cheerleaders were a nice touch. this is an excellent game and worth checking out.",
|
99 |
-
"id": "18"
|
100 |
-
},
|
101 |
-
"19": {
|
102 |
-
"label": true,
|
103 |
-
"text": "washed out: A bit like Simply Reds version of the Valentine bros hit \"Moneys too tight to mention\" - this cover version has nothing of the driving energy that characterised the original recording.",
|
104 |
-
"id": "19"
|
105 |
-
},
|
106 |
-
"20": {
|
107 |
-
"label": false,
|
108 |
-
"text": "great water bottle: I love this bottle it is great. I like knowing it is non toxic and it just works very well. You can have it full and lay it down and it doesn't leak at all.",
|
109 |
-
"id": "20"
|
110 |
-
},
|
111 |
-
"21": {
|
112 |
-
"label": false,
|
113 |
-
"text": "Nice goggles: I am pretty happy with these goggles. They work well during swim workouts in the pool. I do notice a little bit of fogging from time to time. I had hoped to wear them during an upcoming triathlon, but based on a few instances where they slipped a little in the pool I am concerned that they won't be secure enough. I will keep using them in the pool, but will likely get different ones for open water races.",
|
114 |
-
"id": "21"
|
115 |
-
},
|
116 |
-
"22": {
|
117 |
-
"label": true,
|
118 |
-
"text": "aaahhh nnnoooooo!: Hopefully the last film in one of the worst horror trilogys ever made. This series pretty much ruined the horror film for years to come, for one its too self aware, thats incredibley annoying, second everyone acts like they are on Friends or some sitcom. The acting is just plain bad and unconvincing. Now the gore, if you're going with material this weak you should load it up with disgusting violence, is there any in the Scream series? No.Everyone went to see this movie just to see who THE KILLER is. This movie sets low standards to be met, you expect alot of people to die, one shock, then we find out who the killer is, then you go home. Every horror film being made today is like that, there's nothing new or exciting or risk taking, its the same stuff over and over and people are laping it up like dog food.This film is what you've come to expect, bad acting, some people die and we eventually find out who the killer is and all is merry and well. Pathetic.",
|
119 |
-
"id": "22"
|
120 |
-
},
|
121 |
-
"23": {
|
122 |
-
"label": false,
|
123 |
-
"text": "A classic of its kind: This movie is a classic of its kind and much better that a lot of movies, that followed. It is not one of the best, but it still deserves five stars...",
|
124 |
-
"id": "23"
|
125 |
-
},
|
126 |
-
"24": {
|
127 |
-
"label": true,
|
128 |
-
"text": "Nice suite, but Virtual PC 7 disappoints on my G5: I purchased the upgrade since I'd already bought both Office v.X and Virtual PC 6.1 last year.The biggest letdown is that Microsoft's promised support for the G5 is nearly non-existent. I have a dual processor G5 with an ATI Radeon 9800 card (Apple), and after trying to install Virtual PC 7 three times, I cannot get a VM to work. It did install (and work) flawlessly on my G4 Powerbook. Googling for reviews finds it's very hit or miss, but if (when) it misses, you'll regret investing the extra $$$ in an immature product.",
|
129 |
-
"id": "24"
|
130 |
-
},
|
131 |
-
"25": {
|
132 |
-
"label": true,
|
133 |
-
"text": "Okay player, don't expect a miracle: I bought this DVD player at Circuit City earlier this yr for about a $100. I hooked it up to a 47\" Vizio LCD (which by the way has an awesome picture) using a HDMI cable. After fine tuning this product, I was very, very, very diasppointed. The picture was very \"grainy\" (lots of pixels). I have a $35 DVD player that only utilizes an s-video cable that produces a much more crisp picture. Be warned, the picture stinks.",
|
134 |
-
"id": "25"
|
135 |
-
},
|
136 |
-
"26": {
|
137 |
-
"label": false,
|
138 |
-
"text": "A revelation of the science of consciousness evolution and all natural growth: Here is a readable and fascinating account of the development of the new science of chaos theory, the only body of ideas that describes how the natural world as experienced by human beings emerges out of basic quantum processes. The different explorers and innovators of the new science are introduced in a personable way that will enchant the interested reader.",
|
139 |
-
"id": "26"
|
140 |
-
},
|
141 |
-
"27": {
|
142 |
-
"label": true,
|
143 |
-
"text": "Don't say that I didn't warn ya' !: I'm absolutely convinced that Delbert McClinton had no controlover the release of this CD. I rated it 1 star simplybecause there is no 0 star rating ! In actuality , I am not certain that the vocalist on this recording IS Delbert McClinton. Only on the Mr. Pitiful track is there any similarity at all to Delbert's voice. This is the perfect CD for someone with money to burn who would like to have a recording of a 1960's garage band recorded in a garage and who should be working in a garage ! Delbert fans...run fast and run far away from this ! END",
|
144 |
-
"id": "27"
|
145 |
-
},
|
146 |
-
"28": {
|
147 |
-
"label": true,
|
148 |
-
"text": "This item is not available: I ordered this unit on February 7th. Every time I checked back on the status of the order, it read \"not shipped\" and the estimated shipping date got moved out. I really don't think this unit is avaialble from the company anytime soon. I cancelled the order.",
|
149 |
-
"id": "28"
|
150 |
-
},
|
151 |
-
"29": {
|
152 |
-
"label": true,
|
153 |
-
"text": "I used to like ABBA...: I used to like ABBA, until I saw Mama Mia! A horribly disjointed musical, where songs feel contrived to fit into the story; a story that doesn't seem to come together. Individual songs are usually done alright, but don't segue from one to another very well.The cast butchered several of the songs, but especially S.O.S, Take A Chance On Me, and anything where Pierce Brosnan sang. On a side note, I also counted at least two violations of Chekov's Gun. And finally, I think it has a bad moral message. Which you only recognize if you manage to sit through the whole thing.If there is justice in the world, cast members without established careers won't get to have them as punishment for the worst movies I've seen since The Talented Mr. Ripley.",
|
154 |
-
"id": "29"
|
155 |
-
},
|
156 |
-
"30": {
|
157 |
-
"label": true,
|
158 |
-
"text": "A complete disaster!: If you're like me, you probably wanted to check out this movie because it sounded like it really could be an excellent supernatural Gothic horror tale full of goblins and wicked things alike. Well, don't make the same mistake I did and actually watch it. It's horrible. Terrible. An honest to goodness waste of film. The acting is wretched, the film quality is rotten (it actually looks twenty years older than it is), and the plot is thin, weak, and does not give you what it's supposed to. The only reason I bothered to give this film 1 star is because of Alexis Arquette -- he's great looking, but should have left this film out of his career.",
|
159 |
-
"id": "30"
|
160 |
-
},
|
161 |
-
"31": {
|
162 |
-
"label": false,
|
163 |
-
"text": "beautiful detail: I just purchased these Dover COloring Books for my mother and she loves them. The detail is out of this world and the variety of colors you can use are only limited by your inagination. HIGHLY RECOMMENDED!",
|
164 |
-
"id": "31"
|
165 |
-
},
|
166 |
-
"32": {
|
167 |
-
"label": true,
|
168 |
-
"text": "Very disappointed: I looked forward to getting this movie as I had heard many good things about it but it was nothing like I had imagined or been led to believe. There is very little actual history in it or real Christian experience except for the background because the main focus is a soap opera style romance and caricature figures. I agree with the reviewer who described it as a mixture of \"tawdry Hollywood sex\" somehow interspersed with a vague nod to Christianity. The only decent scene was the arena scene where the Christians are going to their deaths singing hymns - but that's not enough to make it a great or even a good movie. Not personally to my taste anyway.",
|
169 |
-
"id": "32"
|
170 |
-
},
|
171 |
-
"33": {
|
172 |
-
"label": true,
|
173 |
-
"text": "Unreliable minikit: I bought this minikit because it got good reviews and it would be perfect for my purposes. However it switches on and off whenever it wants, it looses contact with the phone. Very often the on/off button works only in a horizontal position (?) I use a Treo 650, which is on the compatible phone list. When I contacted Parrot, they said it wasn't (?) At last I opened the unit, but there are no moving parts inside except the micro switches. It is giving me a headache, so I will go searching for an alternative.",
|
174 |
-
"id": "33"
|
175 |
-
},
|
176 |
-
"34": {
|
177 |
-
"label": false,
|
178 |
-
"text": "A Christmas Classic!: This is surely one of the best classical Christmas recordings available. Don't buy the older version, as the quality of this recording is excellent. This is one of those \"Every Christmas - Can't have Christmas without\" recordings.",
|
179 |
-
"id": "34"
|
180 |
-
},
|
181 |
-
"35": {
|
182 |
-
"label": true,
|
183 |
-
"text": "too narrow: These were the narrowest pair of D size shoes I have ever tried on. I don't care how nice a shoe looks. If it don't fit it just don't fit.",
|
184 |
-
"id": "35"
|
185 |
-
},
|
186 |
-
"36": {
|
187 |
-
"label": true,
|
188 |
-
"text": "Lack of extension: This earphones lack a descent extension cord. ITs very small cable, but its of good quality. Sadly, cord its too short, and the extension is useless.",
|
189 |
-
"id": "36"
|
190 |
-
},
|
191 |
-
"37": {
|
192 |
-
"label": false,
|
193 |
-
"text": "Easy-Reading: This is the 3rd Southern Sisters Mystery I've read. They're easy, fast and funny murder mysteries, with lots of cute family stories intertwined in the intrigue.",
|
194 |
-
"id": "37"
|
195 |
-
},
|
196 |
-
"38": {
|
197 |
-
"label": true,
|
198 |
-
"text": "it'd be great if it worked like it was supposed to: for the first 30 seconds it was lovely, but i believe that either the motor isn't powerful enough to keep the shaft rotating smoothly or 3 AA batteries just don't provide enough juice for the motor to work more than 30 seconds. it was a nice idea, but i'm rather dissapointed. the jelly material is somewhat difficult to maintain also. i think if it were hooked up to a larger battery pack it'd be WONDERFUL... which i think i may have a macgyver friend with a knack for electronics attempt to do for me.",
|
199 |
-
"id": "38"
|
200 |
-
},
|
201 |
-
"39": {
|
202 |
-
"label": false,
|
203 |
-
"text": "Not Hornby's best but still good: I loved About a Boy and really, really loved the sardonic wit of High Fidelity. About a Boy is much deeper but just as cynical. Maybe even more so. The characters are richly drawn and just complex enough to keep the reader wanting more. Good read, but best to take some time with this one. Not recommended for a summer beach read.",
|
204 |
-
"id": "39"
|
205 |
-
},
|
206 |
-
"40": {
|
207 |
-
"label": true,
|
208 |
-
"text": "A Disappointment: As with most Taunton Press publications, the illustrations and photographs in this book are spectacular and the organization and layout is superb. Nonetheless, I found this book disappointing. It lacks both depth and breadth. I had hoped for a detailed review of wood joinery including some of the more unusual joinery found in Japanese woodworking. This book, however, is targeted more toward the beginner. Even so, it does not cover the details and \"tricks\" of even the most basic techniques in sufficient detail to allow beginners to easily reproduce them. Consequently, it is unclear who this book was written for - not the beginner as it lacks depth, and not the advanced woodworker as it lacks breadth. Far more effort appears to have been put into appearance and organization than in content.",
|
209 |
-
"id": "40"
|
210 |
-
},
|
211 |
-
"41": {
|
212 |
-
"label": true,
|
213 |
-
"text": "Horrible. Don't do it!: Great price for the item when a 6' one of these at Best Buy is $20. Thing is, the one from Best Buy fits in the outlet and stays there. This cord fits very loose and does not connect. I bought 2 of them, neither did what they were suppose to.As much as I hate to say it, but, buy the more expensive one. At least it works.",
|
214 |
-
"id": "41"
|
215 |
-
},
|
216 |
-
"42": {
|
217 |
-
"label": false,
|
218 |
-
"text": "Given as a gift...: Given to my best friend as a gift. She loves it. Her fiance enjoys making coffee for her in the mornings. :)",
|
219 |
-
"id": "42"
|
220 |
-
},
|
221 |
-
"43": {
|
222 |
-
"label": false,
|
223 |
-
"text": "Love the ring.: This is a nice ring. I was worried it out be thin and cheap looking, but it's not. It's a very pretty stylish ring. Go for it.",
|
224 |
-
"id": "43"
|
225 |
-
},
|
226 |
-
"44": {
|
227 |
-
"label": true,
|
228 |
-
"text": "Beautiful writing Marred by One-Note Characterizations: How could Kingsolver have ruined her book with such an obvious error? Nathan is a strident paper doll that flattens the whole story. Just as bad, the author has all the narrators using the same ironic tone to decribe him, deadening their voices as well. At the same time, Kingsolver doesn't have the guts to show him doing something trully terrible. I don't trust an author who can't let the reader make up his own mind, and as a consequence I couldn't trust her views about ANYTHING in the story. I'm giving this two stars for her descriptions of the African landscape, and that is all.",
|
229 |
-
"id": "44"
|
230 |
-
},
|
231 |
-
"45": {
|
232 |
-
"label": true,
|
233 |
-
"text": "Much worse than any cordless phone I've ever had: This phone cuts out only 2 rooms away from the base station. There is static noise, and callers on the other end complain about sound quality. I can't go into the garden, which used to be no problem with my old 900 MHz phone.",
|
234 |
-
"id": "45"
|
235 |
-
},
|
236 |
-
"46": {
|
237 |
-
"label": true,
|
238 |
-
"text": "Waste of time & money: The first Hangover was not too bad, this one was just terrible. The acting is bad, the script is bad, everything about this movie was just bad. Do yourself a favor, don't buy this movie as it is a total waste of time and money.",
|
239 |
-
"id": "46"
|
240 |
-
},
|
241 |
-
"47": {
|
242 |
-
"label": true,
|
243 |
-
"text": "Did Not Work For Me!: Impressive You Tube Video (Like a Sci-Fi Fantasy). In reality it's a high speed Easy Out so unsurprisingly it broke faster than an Easy out. This product did not work for me. The drill part did not drlil, the puller part did not pull. It was a total zero.",
|
244 |
-
"id": "47"
|
245 |
-
},
|
246 |
-
"48": {
|
247 |
-
"label": false,
|
248 |
-
"text": "Excellent book, long overdue.: From a very long time women were told that looking good was of utmost importance. This was without regard to health or fitness and how age affected these parameters. Witness the whalebone and other types of corsets, the spike heeled shoes and the numerous weight loss programmes on the market (some of which are downright dangerous). Now there is a book, backed by solid research, that allows women of all ages to remain fit and healthy for a lifetime. I am certainly going to recommend this book to all the women I know.Bentley Norville",
|
249 |
-
"id": "48"
|
250 |
-
},
|
251 |
-
"49": {
|
252 |
-
"label": true,
|
253 |
-
"text": "not an all star: Not a practical guide in this collecting age. Does NOT have a comprehensive list; meaning it does NOT cover all manufacturers and, more importantly, for the ones it does, only provides listings of the base set. That means no insert or variation pricing whatsoever. Also, no oddball or minor league issues are listed. Generally speaking, unless you are collecting base sets prior to the advent of inserts and alternate versions of the base set, this guide is fairly useless.",
|
254 |
-
"id": "49"
|
255 |
-
},
|
256 |
-
"50": {
|
257 |
-
"label": true,
|
258 |
-
"text": "Again, second rate city, third rate writer: Just another example of Mr. Lindberg's pitiful attempt at exhibiting a strong expertise on a subject with which he is clearly obsessed. Don't waste your time with this book, either. It is poorly written and fails to engage the reader. You might consider using this book and the first book he wrote on the same subject, as a pair of bookends. That is about all they are worth.",
|
259 |
-
"id": "50"
|
260 |
-
},
|
261 |
-
"51": {
|
262 |
-
"label": false,
|
263 |
-
"text": "Reality: No one should need to convince you to buy this book, you should just do it! It's so well written and worded and brings you right to the heart of a sexual reality that most people like to pretend doesn't really live and breath in their fair cities. I never again want to hear someone bad mouth a working girl for what she does. I will and do now however look at men with a curious eye wondering if they are depraved peep show window lickers :)",
|
264 |
-
"id": "51"
|
265 |
-
},
|
266 |
-
"52": {
|
267 |
-
"label": true,
|
268 |
-
"text": "Bummer: Visual effects and Battle footage were great...the other 85% of the movie was just lousy fluff...",
|
269 |
-
"id": "52"
|
270 |
-
},
|
271 |
-
"53": {
|
272 |
-
"label": false,
|
273 |
-
"text": "The spark of idependence: Filled with the independent spark that made us all love life at one point or another. A fun, introspective and nonsensical movie that sticks with you.",
|
274 |
-
"id": "53"
|
275 |
-
},
|
276 |
-
"54": {
|
277 |
-
"label": false,
|
278 |
-
"text": "What I expected from Mirman's website. Funny. Funny. Russian.: lol, gotta love Eugene. Even when his audience doesn't initially laugh, he gets in a good zinger at himself and they laugh at that. He's witty without being condescending, and uncomplicated without seeing contrived. However, if you're not a fan of irreverant humor, this may not be for you.",
|
279 |
-
"id": "54"
|
280 |
-
},
|
281 |
-
"55": {
|
282 |
-
"label": true,
|
283 |
-
"text": "Do not...repeat...do not bother!: It is not often that I offer a negative review but this compilation while attractive does not deliver at all.The foot massage gizmo is awkward and uncomfortable.The pumice stone leaves rough splinter like skin.The foot scrub doesn't reall scrub.The rotary action tool has five heads, none of which work well and you must hold the switch in place or it turns off. It is cumbersome and ineffective.The one star was initially given for a foot brush (which later lost its bristles very easily as I update the review) and a sweet smelling foot repair balm.Don't waist your money. Soak your feet and invest in an inexpensive German Titania file, smooth and coarser side, or a like product. It will last for years.",
|
284 |
-
"id": "55"
|
285 |
-
},
|
286 |
-
"56": {
|
287 |
-
"label": true,
|
288 |
-
"text": "Not Sandra's Best: Ms. Brown has written better romance novels. Don't give up on her if this was your first Sandra book.The feeble female lead struggles with a 15-year crush that walks back into her life. The smug male lead acts like a jerk through most of the novel. The romance scenes grapple to muster up passion but fall short. Both of the main characters bothered me; my favorite character was the 17-year old.A quick read...about 4 hours (with interruptions) for me...but probably not worth it.",
|
289 |
-
"id": "56"
|
290 |
-
},
|
291 |
-
"57": {
|
292 |
-
"label": false,
|
293 |
-
"text": "Impressed: Lots-O-Fun. Wood and glass toys are high quality and are a good fall back for the kids to play with they are \"bored\". Would buy again.",
|
294 |
-
"id": "57"
|
295 |
-
},
|
296 |
-
"58": {
|
297 |
-
"label": true,
|
298 |
-
"text": "Light turned on by itself 3 times: The installation was easy. I used it for a week, everything worked fine, EXCEPT the light it connected to turned on by itself 3 times so far, with no one near to either one of the switch. Not sure whether it is a defective unit, or this product is too sensitive to noise. I'm returning this product and will just install a regular switch instead.",
|
299 |
-
"id": "58"
|
300 |
-
},
|
301 |
-
"59": {
|
302 |
-
"label": false,
|
303 |
-
"text": "good battery: I feel kind of silly writing a review for a battery, but have to say that these last a LONG time. Work very well.",
|
304 |
-
"id": "59"
|
305 |
-
},
|
306 |
-
"60": {
|
307 |
-
"label": false,
|
308 |
-
"text": "Even a Woman finds it funny: Yes, even a woman finds \"Married to Mommy\" funny. The book gets you laughing aloud when it is trying to make fun of \"Mommies\". The truth is that it really is making fun of the stupidity of men and their simple basic needs of sex, getting out of work, and beer. Of course, the truth is always funny.A definite MUST for any woman, married or not. We will now know all the secret tricks the men try to use on us.By the way, I am NOT a MOMMY!",
|
309 |
-
"id": "60"
|
310 |
-
},
|
311 |
-
"61": {
|
312 |
-
"label": false,
|
313 |
-
"text": "Gungrave...not quite what you might expect: Those thinking this is another version of Trigun will be disappointed. Gungrave is actually a lot deeper and more complex. The lead is short on dialouge, but the story has more depth and character development than most anime. The first DVD is more about the main character's past than about the reanimated killing machine he's become, but it definitely leaves you wanting more.",
|
314 |
-
"id": "61"
|
315 |
-
},
|
316 |
-
"62": {
|
317 |
-
"label": false,
|
318 |
-
"text": "Error in product description: It's great in every way. However, if you'd prefer a digital tuner (as I do), then you might need to look further. The product description boasts a digital AM/FM tuner, but it's disappointingly an analog AM/FM tuner.Overall - especially for the price - I think it's pretty good.",
|
319 |
-
"id": "62"
|
320 |
-
},
|
321 |
-
"63": {
|
322 |
-
"label": false,
|
323 |
-
"text": "good phone but not as user friendly as it could be: Battery life is very good. Phone has good range. My only complaint is it's to involved to get your message from the handset.",
|
324 |
-
"id": "63"
|
325 |
-
},
|
326 |
-
"64": {
|
327 |
-
"label": true,
|
328 |
-
"text": "Big waste of money (and space in my house!): My 5 year old son wanted this so bad, but when we got it for him, there were so many pieces to put together that didn't fit together well, he never played with it. It just sits on our floor in many pieces taking up toy space! What a waste!",
|
329 |
-
"id": "64"
|
330 |
-
},
|
331 |
-
"65": {
|
332 |
-
"label": false,
|
333 |
-
"text": "Don't want to take it off: Very satisfied with an earlier purchase of this Bali bra model, I was just as pleased with the new one. Very comfortable, well made and a good neutral color. It will be my next choice, too.",
|
334 |
-
"id": "65"
|
335 |
-
},
|
336 |
-
"66": {
|
337 |
-
"label": false,
|
338 |
-
"text": "Fantastico: If anybody who's into rock music is ever looking for a band to keep you on your toes, this is the band. I've been a fan for 10 years now, and no album has ever sounded like any of their previous albums. This disc is fantastic with such a variety of styles, as are the previous releases, even back to the Rainbow Butt Monkey days.",
|
339 |
-
"id": "66"
|
340 |
-
},
|
341 |
-
"67": {
|
342 |
-
"label": true,
|
343 |
-
"text": "too much visual: There are far too much designs, visuals, colors, etc in the book - this is highly distracting, as TV screen can be...By way of example (among so many...), what is the use of colors with the three squares of the Pyth. theorem???? this is as useless as writting 2+3=5 with 2 in blue, 3 in red and 5 in yellow...I wish I had purchased the 2nd edition, which according to reviews was closer to what I was looking for.",
|
344 |
-
"id": "67"
|
345 |
-
},
|
346 |
-
"68": {
|
347 |
-
"label": false,
|
348 |
-
"text": "Aretha's First Arista Release Showed Pleasures to Come: After a long and musically satisfying career with Atlantic, Aretha severed her ties with that company and moved under the wing of Arista's Clive Davis. With the start of the 1980's, Aretha was looking for new territory to conquer and almost succeeded with this mixed bag.\"United Together\" is a fine tune that benefits from beautiful orchestral arrangement that is matched by Aretha's superb vocal instrument. The remake of \"Can't Turn You Loose\" allows Aretha to show why she is the Queen of Soul\" for she really belts this one out. Another cover, that of the Doobies' \"What a Fool Believes,\" is an interesting interpretation. The final cut \"School Days\" appears to be \"autobiographical\" for every girl growing up in the fifties.Although not as strong as her Atlantic work, \"Aretha\" is still a suitable addition to the artist's discography.",
|
349 |
-
"id": "68"
|
350 |
-
},
|
351 |
-
"69": {
|
352 |
-
"label": true,
|
353 |
-
"text": "Misguided Purchase: The photo and description do not reflect the product. The screen panel kit I received was white. What a huge inconvenience during a time-crunch.",
|
354 |
-
"id": "69"
|
355 |
-
},
|
356 |
-
"70": {
|
357 |
-
"label": true,
|
358 |
-
"text": "Banacek: My husband and were looking forward to seeing this series.The first show was SO boring, we finally just quit watching it.Actually, we haven't gotten around to watching anymore. I guess we were afraid of a repeat.Maybe that was just once, I hope!",
|
359 |
-
"id": "70"
|
360 |
-
},
|
361 |
-
"71": {
|
362 |
-
"label": false,
|
363 |
-
"text": "JDT: Uncle Tupelo is without doubt one of the most under appreciated groups of the 90's. Anodyne, like each of the three albums that came before it, has everything that a remarkable recording requires: great songs, honest lyrics, and artists who really care about the music they are making. Like the best of Dylan and Springsteen, the songs are about real people with real troubles and joys. When you hear them you know they are coming from the heart. The songs contributed by Jay Farrar and Jeff Tweedy are easily differentiated by the voacls, music, and lyrics. What makes this record interesting is how well these unique sounds compliment each other. The union is seamless.",
|
364 |
-
"id": "71"
|
365 |
-
},
|
366 |
-
"72": {
|
367 |
-
"label": false,
|
368 |
-
"text": "Well Worth Reading: First a confession: Miriam Wasserman was my mother. However, she published several books, but this is the only one I really found useful. She walks the reader through the New York City school system and the attitudes of different groups involved in the system back in the 1960s. This includes parents, teachers and administrators. Her view is that the further away one got from parents and students, the more prestige one had. She meticulously describes the teachers' strike of 1968 against \"community control of schools\", a strike of which she is extremely critical. She explores the racism that was involved in this strike, including using quotes from striking teachers, etc. It should be emphasized that the author was pro-union all her life, so her views don't stem from an anti-union bias. The book also covers the high school student rebellion which coincided with and followed the strike.",
|
369 |
-
"id": "72"
|
370 |
-
},
|
371 |
-
"73": {
|
372 |
-
"label": false,
|
373 |
-
"text": "compact and loaded: I bought this phone after reading the cnet reviews and really liked it. It looks small and really compact. I like the camera pics at 2 mega pixel and bright flash. The mp3 player is crisp. The headset that comes along delvers amazing fM radio. I think my phone is not very loud and you have a problem when you are around a noisy crowd. I just bought this phone again for my cousin. He likes it too. Almost forgot the display is very good.",
|
374 |
-
"id": "73"
|
375 |
-
},
|
376 |
-
"74": {
|
377 |
-
"label": false,
|
378 |
-
"text": "Outstanding text!: Brooks/Cole should keep this text in their catalog for ages! It is well-written, examples are generally quite clear, vocabulary is introduced well, and the exercises develop real skills, rather than simply be busy-work. One of the best calculus books ever!",
|
379 |
-
"id": "74"
|
380 |
-
},
|
381 |
-
"75": {
|
382 |
-
"label": false,
|
383 |
-
"text": "Excel 2003 Bible: Very good source of information. I will most likely buy other books in this series.",
|
384 |
-
"id": "75"
|
385 |
-
},
|
386 |
-
"76": {
|
387 |
-
"label": false,
|
388 |
-
"text": "Tasting is Believing: Gluten-free breads used to have a gritty texture from the rice flour, and were too soft for sandwiches. Bette Hagman uses garbanzo/fava bean flour, sorghum flour, tapioca flour, and corn starch to create breads which have a similar texture to wheat flour breads, and the flavors of her breads are fabulous.My BF bought me this book and a great tasting beverage to drink it with. Since he knows I quit coffee recently, he's been really wonderful helping me in cope with my mood swings. S o y f e e is made from soy beans that is roasted just like coffee. I enjoy the taste and don't miss coffee one bit. Buy it online at www.s o y c o f fee.com.This is a 'must have' for anyone baking gluten-free. I think all of Bette Hagman's books are wonderful and a must for those with gluten intolerance.",
|
389 |
-
"id": "76"
|
390 |
-
},
|
391 |
-
"77": {
|
392 |
-
"label": false,
|
393 |
-
"text": "5 stars for the show, no stars for the \"Collector's Edition\": I was really looking forward to getting this Collector's Edition and see what extras were added. I knew it wasn't a lot - just a mini-book and a documentary - but I figured it would be packaged in a cool way.Wrong.As others have already mentioned, the Collector's Edition is *literally* theAvatar: The Last Airbender - The Complete Book 1 Collectionslipped into another cardboard box, with a little booklet and DVD in an envelope (not even a case!) wedged in. It's really disappointing; it would have been so easy to create a quality Collector's Edition but the studio couldn't be bothered, I guess.",
|
394 |
-
"id": "77"
|
395 |
-
},
|
396 |
-
"78": {
|
397 |
-
"label": false,
|
398 |
-
"text": "sula scottcampos: Sula, a book that talks about the issues of being a black women is a really good novel to read.One of the reasons I recommend it is because of its realism and its themes - death, sex, friendship and poverty.I also think that its characters are very good, its easy to identify with one or both of them. I really recommend this book to anyone who enjoys good literature.",
|
399 |
-
"id": "78"
|
400 |
-
},
|
401 |
-
"79": {
|
402 |
-
"label": false,
|
403 |
-
"text": "Fantastic! It's a must-have for girls!: I hated razor, tried shaving but it did not work for me. Shaving made the hair grows thicker and faster afterwards, plus the roots are impossible to be getting rid of. After reading the reviews, I ordered it to try, I used it for once and already fall in love with this. I used to use small tweezer to pluck out my leg's hair, in order to avoid the razor, it took me a few hours to do that but this super electronic tweezer works wonder! You won't see the black roots and I have smooth and silkly legs in 20 mins. It does not hurt at all, if you use it on your legs. But, if you use it at your under arm, it won't be a pleasant feeling, of course! I will never use anything else besides this for hair removing anymore! highly recommended!",
|
404 |
-
"id": "79"
|
405 |
-
},
|
406 |
-
"80": {
|
407 |
-
"label": true,
|
408 |
-
"text": "This is not a toy: I guess I was expecting more out of these leave window decals. I just didn't find them attractive after placing them on my window, they seem very cheap, I guess because they are cheap.I threw them away.",
|
409 |
-
"id": "80"
|
410 |
-
},
|
411 |
-
"81": {
|
412 |
-
"label": false,
|
413 |
-
"text": "Wonderful book for anyone running a professional hatchery: This book is aimed more for hatcheries that are raising Trout, Salmon, Catfish and other food fishes. However, there is so much information in this book that even ornamental fish hatcheries will find an incredible amount of useful information. The chapters on Fish Nutrition are especially helpful.",
|
414 |
-
"id": "81"
|
415 |
-
},
|
416 |
-
"82": {
|
417 |
-
"label": false,
|
418 |
-
"text": "Amazing book!!: Once again, Eric Victorino's artistic talent is put into this great free-verse poetry book. I couldn't put it down and I finished it the day I received it in the mail. All of the poems are awesome but the one I found the most interesting was \"It's A People Business.\" All of the experiences in his life, personally and with his band, come to life in this book. Please check it out! It's worth every penny!!",
|
419 |
-
"id": "82"
|
420 |
-
},
|
421 |
-
"83": {
|
422 |
-
"label": false,
|
423 |
-
"text": "The white trumpet contender respect Miles Davis!: The story of the Jazz in the Fifties certainly would be remain unfinished without the ominous presence of this outstanding virtuoso. Baker sound still possesses this alluring hook, this magnetic engagement charm, eloquent expressiveness, enrapturing lyricism and contagious rhythm, despite the elapsed time, which confirms by itself the status of his musicianship.This selection is jus a little sample of the broad universe of his genius. A well thought selection of great musical successes, available, preserved and immortalized by the Digital Technology for our future enjoyment.Absolutely indispensable in your treasured collection.",
|
424 |
-
"id": "83"
|
425 |
-
},
|
426 |
-
"84": {
|
427 |
-
"label": true,
|
428 |
-
"text": "What the?: I'm sorry, maybe it's just me but I can't helping stating that this has to be one of the wrost movies I've seen in my life!Can you say boring? Can you say doesn't make sense at all? The first 30 minutes of the movie were O.K. But it went downhill after that. This movie is a prime example of a director attempting to make a deep movie with a meaningful lesson but failed on all levels. I don't recommend this movie unless you want to go to sleep or you don't have anything else to do.",
|
429 |
-
"id": "84"
|
430 |
-
},
|
431 |
-
"85": {
|
432 |
-
"label": false,
|
433 |
-
"text": "very very good!!!!: linda blair is a young girl who is possessed. and her mother doesn't know what to do until one day when she hears her daughter screaming and stabbind herself she knows what to do GET AN EXORCIZIM!!!",
|
434 |
-
"id": "85"
|
435 |
-
},
|
436 |
-
"86": {
|
437 |
-
"label": false,
|
438 |
-
"text": "Awesome product for the price!: This range extender works as advertised! I am very happy with the purchase. I was a little worried after reading some of the horror stories here, but I have to say, Chovy's review instructions (on this site) were just this ticket to get the repeater up and running in less than 30 minutes. It was unbelievably easy to install! Do not be frightened by negative reviews. If you can set up a wireless network, you can set up this repeater. However, I did upgrade the firmware before I did anything else and maybe that helped. I got the firmware update from the Belkin site.",
|
439 |
-
"id": "86"
|
440 |
-
},
|
441 |
-
"87": {
|
442 |
-
"label": true,
|
443 |
-
"text": "Slight: This book is either a heavily illustrated short story collection or a text-heavy comic. Its unusual format is its most original feature. Its plots are negligible, but its illustrations and text evoke a unique atmosphere of self-conscious nonconformism. Although its target audience is dare-to-be-different teens and college students, its interesting turns of phrase and expressive line drawings are not devoid of interest for general audences.",
|
444 |
-
"id": "87"
|
445 |
-
},
|
446 |
-
"88": {
|
447 |
-
"label": false,
|
448 |
-
"text": "ANgeleyes: Seem to dry up their eyes fairly well, although I haven't seen the color (brown stain) change much yet.",
|
449 |
-
"id": "88"
|
450 |
-
},
|
451 |
-
"89": {
|
452 |
-
"label": true,
|
453 |
-
"text": "Nice Try: Salt Lake 2002 is not a bad game, but it isn't good either. The graphics are excellent, but some of the events are bad. Bobsleigh, and skiing aren't bad but the others are. You dont stay into it for long. I liked it for a while, but it gets boring.",
|
454 |
-
"id": "89"
|
455 |
-
},
|
456 |
-
"90": {
|
457 |
-
"label": true,
|
458 |
-
"text": "Cutler's share of the pie: This book was a major disappointment. I am familiar with books written solely by the Dalai Lama, such as the \"Library of Tibet\" series, which are much more engrossing and have much more substance than Cutler's book. Cutler attempts (successfully, sadly) to have his share of the profitable market that involves the Dalai Lama's writings. The book is insipid, does not try to explain any important issue in the light of Buddhist philosophy, and only rehashes issues that several other westerners already wrote about. It's another big ego trip: we keep hearing time and again about his opportunities to be with the Dalai Lama. What a shame, Cutler. I sold the book as soon as I finished it.",
|
459 |
-
"id": "90"
|
460 |
-
},
|
461 |
-
"91": {
|
462 |
-
"label": true,
|
463 |
-
"text": "Mostly tedious, with interesting parts: I found the writing interesting, and the subject fascinating, but I found myself frustrated by the author's difficulty in talking directly about the status of Muslim women with her interview subjects. The author spent many pages writing about the menus and dress of the many middle and upper-middle class women she interviewed. It seemed as though her interview subjects resisted her efforts to discuss the status of women in their countries, so we too as readers had to wade through much distracting material and misunderstandings about feminism and gender. Great travel stories, but not a great source of information about Muslim women.",
|
464 |
-
"id": "91"
|
465 |
-
},
|
466 |
-
"92": {
|
467 |
-
"label": true,
|
468 |
-
"text": "Sesame Street Toddler: I did not find this game to be as educationally sound as I would expect from Sesame street. There is too much talking before the program will react to a command. The graphics are jerky and the cursor acts like the target is magnetically charged and keeps pushing away the cursor. When the child actually does manage to click on a target, the cursor may still fly to another target and the child is told that his answer is wrong. Another example of educational problems is the pronunciation of \"eggs\" using a long \"a\" sound instead of a short \"e.\" This is not very helpful in teaching a child the sound for short \"e.\" Children that are used to playing computer games by themselves may find that this game is too frustrating to do alone. The open ended learning curve is a great idea. I just wish Sesame Street would hire a truly qualified literacy expert to help clean up the many problems in this program.",
|
469 |
-
"id": "92"
|
470 |
-
},
|
471 |
-
"93": {
|
472 |
-
"label": true,
|
473 |
-
"text": "needs a buzz cut and a point: I avoided reading this book, not because of the hermaphrodite subject matter, but because I have never read a multigenerational family saga that I liked. Many books let me down in the middle, and this was no exception. The beginning of the book was incredible and harrowing, with momentum and characterization. The post-America nextgens part of the saga was so boring I found myself flipping and flipping - always a bad sign. If there was some kind of larger point to all of that, then I must have missed it. Yes there's the identity duality and trinity themes playing out here: man/woman, greek/turkish/american modern/old world sick/healthy innocent/guilty original/reinvented. But it was almost as if the author was saying - here it is again - get it? I like my fiction much more subtle than this.",
|
474 |
-
"id": "93"
|
475 |
-
},
|
476 |
-
"94": {
|
477 |
-
"label": true,
|
478 |
-
"text": "OMG! DO NOT BUY!: I normally don't take the time to submit a review.In this case however, I feel obligated to do so.This is by far one of the worst purchases I have ever made.Here's why.....The contraption is far too bulky.The case's enclosing is unbearable, takes a good minute or so to open it.The texture of the material feels like a cheap toy.The overall design is horrible, something I could make in my basement.For the love of everything sacred, do not buy this thing.",
|
479 |
-
"id": "94"
|
480 |
-
},
|
481 |
-
"95": {
|
482 |
-
"label": false,
|
483 |
-
"text": "Good price, good quality: Comparable HDMI cables can be bought for 45 or more. Even though the price is cheap the quality is good, no problems so far.",
|
484 |
-
"id": "95"
|
485 |
-
},
|
486 |
-
"96": {
|
487 |
-
"label": false,
|
488 |
-
"text": "Good rock music: This is what i call rock music good beat and good lyrics, don't listen to the other reviews. This cd is one of the best, listen to a few songs and you will get hooked. I recommend this cd its awesome.",
|
489 |
-
"id": "96"
|
490 |
-
},
|
491 |
-
"97": {
|
492 |
-
"label": true,
|
493 |
-
"text": "BORING!: This movie is soo boring. How in the hell did this movie make so much at the box office. Do people really want to pay for crappy movies like this. bottom line this is a chick flick nothing is good. And now they are re-releasing this movie with more boring stuff. This is the worst movie ever.",
|
494 |
-
"id": "97"
|
495 |
-
},
|
496 |
-
"98": {
|
497 |
-
"label": true,
|
498 |
-
"text": "Already Rusting: Inferior quality. The plating is thin and rust is coming through the finish. Inexcusable for a product that is designed for use in a humid environment.",
|
499 |
-
"id": "98"
|
500 |
-
},
|
501 |
-
"99": {
|
502 |
-
"label": true,
|
503 |
-
"text": "confusing internet setup: i wanted a camera that could email photos but this camera will not go out through the router and the manual setup , to punch a hole thru router is confusing.",
|
504 |
-
"id": "99"
|
505 |
-
},
|
506 |
-
"04c7dfc0f94e4e88968d09b40edbfa14": {
|
507 |
-
"label": true,
|
508 |
-
"text": "The new gaming console is unaffordable.",
|
509 |
-
"id": "04c7dfc0f94e4e88968d09b40edbfa14"
|
510 |
-
},
|
511 |
-
"58f58a1a4cbb4bb699772ed934006ec8": {
|
512 |
-
"label": true,
|
513 |
-
"text": "How can it be sure difficult for @115830 to deliver a package to a University address? Two failed attempts so far ...",
|
514 |
-
"id": "58f58a1a4cbb4bb699772ed934006ec8"
|
515 |
-
},
|
516 |
-
"d4a3cd4877c54aef81c376eff8008df4": {
|
517 |
-
"label": false,
|
518 |
-
"text": "@204780 Glad they showed up! Hope you have a great flight! -Sean",
|
519 |
-
"id": "d4a3cd4877c54aef81c376eff8008df4"
|
520 |
-
},
|
521 |
-
"affe1d6548f84bed84238bac45cc10a1": {
|
522 |
-
"label": false,
|
523 |
-
"text": "@British_Airways Thank you! All looks good then \ud83c\uddec\ud83c\udde7\u2708\ufe0f",
|
524 |
-
"id": "affe1d6548f84bed84238bac45cc10a1"
|
525 |
-
},
|
526 |
-
"e304ea77a94c450a95690c7b605a035f": {
|
527 |
-
"label": false,
|
528 |
-
"text": "@246667 Thank you for reaching out, Andrea. The built in application in Windows 10 are exempted to be uninstalled. However, you can send this suggestion directly to our developers via the Feedback Hub so they can take a look at it: https://t.co/jowrfbgQm6. Keep in touch.",
|
529 |
-
"id": "e304ea77a94c450a95690c7b605a035f"
|
530 |
-
},
|
531 |
-
"76b694b019eb4e6888a422e144030bd0": {
|
532 |
-
"label": true,
|
533 |
-
"text": "@GWRHelp It\u2019s mainly the constant short forming and cancellations due to mechanical faults Phil. As a company, these excuses have been used ad nauseam for years and years. It just gets worse and no amount of rhetoric and IET self promotion can hide that fact.",
|
534 |
-
"id": "76b694b019eb4e6888a422e144030bd0"
|
535 |
-
},
|
536 |
-
"ce0698020b7a457396c7674b04db10e6": {
|
537 |
-
"label": false,
|
538 |
-
"text": "English gangster flick.",
|
539 |
-
"id": "ce0698020b7a457396c7674b04db10e6"
|
540 |
-
},
|
541 |
-
"52bda6cbab224899845e66e0474cdefc": {
|
542 |
-
"label": false,
|
543 |
-
"text": "sees the formula graph, the chip calculates the formula, able to \"survive\" thanks to its connection to Edit, develops a parallel personality and affords her abilities greater than she ever imagined...",
|
544 |
-
"id": "52bda6cbab224899845e66e0474cdefc"
|
545 |
-
},
|
546 |
-
"435aabe68c294963a05e090d479582bc": {
|
547 |
-
"label": false,
|
548 |
-
"text": "Aanandam is a 2016 Indian Malayalam campus musical film written and directed by Ganesh Raj in his directorial debut. Vineeth Sreenivasan produces the film under the banner of Habit Of Life with Vinod Shornur under Cast N Crew.",
|
549 |
-
"id": "435aabe68c294963a05e090d479582bc"
|
550 |
-
},
|
551 |
-
"f96313d0087e4941a359783634ef9e86": {
|
552 |
-
"label": false,
|
553 |
-
"text": "The remarkable story of The Weather Underground, radical activists of the 1970s, and of radical politics at its best and most disastrous.",
|
554 |
-
"id": "f96313d0087e4941a359783634ef9e86"
|
555 |
-
},
|
556 |
-
"f63e4502791a409fa2d750687d3841eb": {
|
557 |
-
"label": false,
|
558 |
-
"text": "A young widow on a trip to the backwoods stumbles upon the operation of a gang of drug smugglers. They attempt to kill her in order to keep their operation a secret, but she turns out to be more resourceful than they thought, and starts to turn the tables on them.",
|
559 |
-
"id": "f63e4502791a409fa2d750687d3841eb"
|
560 |
-
},
|
561 |
-
"108ac02949324b02bdcbe4c7a77bacdc": {
|
562 |
-
"label": false,
|
563 |
-
"text": "The story of a young Marine, fresh from Camp Pendleton, who is forced to confront the complexities of adulthood and a volatile home life during a four-day Thanksgiving leave.",
|
564 |
-
"id": "108ac02949324b02bdcbe4c7a77bacdc"
|
565 |
-
},
|
566 |
-
"44fc412246964b2393fa0035ff093a00": {
|
567 |
-
"label": false,
|
568 |
-
"text": "Exploring the rough and tumble world of hockey, Academy Award winner Alex Gibney (\"Taxi to the Dark Side\") looks at the world of the NHL enforcers and specifically the career of Chris \"Knuckles\" Nilan who helped the Montreal Canadiens win the Stanley Cup.",
|
569 |
-
"id": "44fc412246964b2393fa0035ff093a00"
|
570 |
-
},
|
571 |
-
"409350c111af4ba3a94c842b797ddb95": {
|
572 |
-
"label": false,
|
573 |
-
"text": "Two fishing fanatics get in trouble when their fishing boat gets stolen while on a trip.",
|
574 |
-
"id": "409350c111af4ba3a94c842b797ddb95"
|
575 |
-
},
|
576 |
-
"d48d8f3b5a524ecea69bae718d1f1513": {
|
577 |
-
"label": false,
|
578 |
-
"text": "A willful young boy follows his just as obstinate grandmother in a journey across Iraq, determined to discover the fate of her missing son, Ahmed's father, who never returned from war.",
|
579 |
-
"id": "d48d8f3b5a524ecea69bae718d1f1513"
|
580 |
-
},
|
581 |
-
"283e96de5b474240a044c50dbc2551fb": {
|
582 |
-
"label": false,
|
583 |
-
"text": "A group of people are sitting in a theatre watching a movie when one realises that the woman on the screen is her. (IMDb)",
|
584 |
-
"id": "283e96de5b474240a044c50dbc2551fb"
|
585 |
-
},
|
586 |
-
"516d0f2f3a854a97a87c64db19a89fac": {
|
587 |
-
"label": false,
|
588 |
-
"text": "of the fake prediction. Fantastic swashbuckling adventures in a 18th century setting, with a light criticism of the war and the mighty.",
|
589 |
-
"id": "516d0f2f3a854a97a87c64db19a89fac"
|
590 |
-
},
|
591 |
-
"c2f55710669b40aa937625fe0ab04065": {
|
592 |
-
"label": false,
|
593 |
-
"text": "famous for his reputation as a Don Juan, to seduce C\u00e9cile and emotionally destroy her. While on his mission, Valmont gets sidetracked when he goes to visit his aunt and falls for Madame Tourvel, a virtuous, married woman who knows of his womanizing ways, but that only makes the challenge more exciting to Valmont. Together, Madame de Merteuil and Valmont make a dangerous team and they will stop at nothing when it comes to matters of the heart.",
|
594 |
-
"id": "c2f55710669b40aa937625fe0ab04065"
|
595 |
-
},
|
596 |
-
"ba0261b2ee3244d29bb3a8c6d77195a6": {
|
597 |
-
"label": false,
|
598 |
-
"text": "sees the formula graph, the chip calculates the formula, able to \"survive\" thanks to its connection to Edit, develops a parallel personality and affords her abilities greater than she ever imagined...",
|
599 |
-
"id": "ba0261b2ee3244d29bb3a8c6d77195a6"
|
600 |
-
},
|
601 |
-
"5e724fbde8ee44d9a8fc87a6e6667f01": {
|
602 |
-
"label": false,
|
603 |
-
"text": "telling the story about people who despite all obstacles strive for their goal.",
|
604 |
-
"id": "5e724fbde8ee44d9a8fc87a6e6667f01"
|
605 |
-
},
|
606 |
-
"557eba5ebfc9467a9d88688afed41354": {
|
607 |
-
"label": false,
|
608 |
-
"text": "A young playboy who learns he has one month until he becomes infertile sets out to procreate as much as possible.",
|
609 |
-
"id": "557eba5ebfc9467a9d88688afed41354"
|
610 |
-
},
|
611 |
-
"aa20e22fbe96487d8ee1223a6ef4da0b": {
|
612 |
-
"label": false,
|
613 |
-
"text": "Set in modern times, Alex finds King Arthur's sword Excalibur and must prove himself worthy of it.",
|
614 |
-
"id": "aa20e22fbe96487d8ee1223a6ef4da0b"
|
615 |
-
},
|
616 |
-
"bea56d34f6df408c9ec9653b17a90a93": {
|
617 |
-
"label": false,
|
618 |
-
"text": "Kostis is a 40-year-old doctor that finds himself in the small island of Antiparos, in order to take over the local clinic. His whole life and routine will turn upside down when he meets an international group of young and beautiful tourists and he falls in love with Anna, a 19-year-old goddess.",
|
619 |
-
"id": "bea56d34f6df408c9ec9653b17a90a93"
|
620 |
-
},
|
621 |
-
"e61a3251720d425c9f4770cb4b11d2d9": {
|
622 |
-
"label": false,
|
623 |
-
"text": "Friends on a weekend excursion take a path into a forest that leads to death and destruction.",
|
624 |
-
"id": "e61a3251720d425c9f4770cb4b11d2d9"
|
625 |
-
},
|
626 |
-
"5471008376cf44518f2ff1f67f057c08": {
|
627 |
-
"label": false,
|
628 |
-
"text": "Mr Bournelis suggested all 30 lineal metres of blockwork should be removed and replaced, which would require removing and reinstalling the fence. The total cost of his suggested method of rectification was said to be $14,650 for each unit, giving a total cost of rectification of $29,300.",
|
629 |
-
"id": "5471008376cf44518f2ff1f67f057c08"
|
630 |
-
}
|
631 |
-
},
|
632 |
-
"version": 27,
|
633 |
-
"description": "Negative sentiment"
|
634 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data/concept/lilac/negative-sentiment/sbert.pkl
DELETED
Binary file (106 kB)
|
|
data/concept/lilac/positive-sentiment/concept.json
DELETED
@@ -1,564 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"namespace": "lilac",
|
3 |
-
"concept_name": "positive-sentiment",
|
4 |
-
"type": "text",
|
5 |
-
"data": {
|
6 |
-
"0": {
|
7 |
-
"label": false,
|
8 |
-
"text": "Starting To Be Annoyed By Becky...: I'm not sure why I keep reading these books, but I guess it's because I've read the first two so I'll keep reading the rest of the books. In the first book, I really found it amusing. I was a little annoyed by the fact that Becky couldn't stop spending, but then again that's why she is called a Shopaholic. In the second book, I felt more of the same it was just magniifed more. Now in the third book, I'm just down right annoyed by Becky Bloomwood. In this book, she wasn't going on crazy shopping sprees, just planning two different weddings because she was afraid to tell each person and because I feel she's really selfish. Still, I read the book because I wanted to see how she could get herself out of another situation. I will say that I love her friends Suze and Danny, her client Laurel and her husband Luke. Maybe that's why I keep reading. I will read the next book, but I'm sure I'll be just as annoyed when I'm done.",
|
9 |
-
"id": "0"
|
10 |
-
},
|
11 |
-
"1": {
|
12 |
-
"label": false,
|
13 |
-
"text": "the cover is fine - the pool is horrible: The entire pool was horrible. The cover was fine once we got it on, but we finally got rid of the pool after 2 weeks because it was so hard to set up and keep clean.",
|
14 |
-
"id": "1"
|
15 |
-
},
|
16 |
-
"2": {
|
17 |
-
"label": true,
|
18 |
-
"text": "Good album, not their best.: This album is probably the most marketable and radio friendly of all of dashboard's albums. For the peripheral listener it may be the right one to get to introduce you to this band. But as a Dashboard fan of 5 or so years I truly hope they return to their original sound for their next work. Not for the listen-ability but for the show. To this day the fans react best to the songs from \"Places\" or \"A Mark, A Mission.\" I recommend this album to everyone but I also recommend any and all of their other work.",
|
19 |
-
"id": "2"
|
20 |
-
},
|
21 |
-
"3": {
|
22 |
-
"label": false,
|
23 |
-
"text": "This is a horror novel, right?: Never one to pass up any vampire novel, I purchased Sips because the description seemed interesting. Vampires, Marquis de Sade, fetishism, yada yada yada. If this is a comedy, I give it 4 stars; however, I'll give it 1 star as a horror novel. Sade was rather boring; I would think a character as intense and multi-faceted as the Marquis de Sade would make for a more interesting vampire. The writing style isn't too bad, but overall I found the characters to be mildly amusing at best. The plot was thin, the end was anti-climactic, and the vampires were not very frightening. The book had little suspense, and it leaves a mile-wide opening for a sequel at the conclusion. I would, however, like to see something more of the vampire mutants lurking in the graveyard. They were the most riveting of any of the characters.",
|
24 |
-
"id": "3"
|
25 |
-
},
|
26 |
-
"4": {
|
27 |
-
"label": true,
|
28 |
-
"text": "Superb mix of global non secular musical denominations: I first heard Ms. Pook's music on the \"Eyes Wide Shut\" soundtrack (the masquerade ball scene) and was blown away; if ever there was a necessity for music to permeate a scene in a film this was it. She incorporates a blend of the traditional songs from faiths across continents and mixes them, for lack of a better comparison than similar to your quintessential raver d.j. (though these are better and definitively more original :) \"Oppenheimer\" is my favorite, and if you let the last track run for a few minutes a portion of the song will play once more. I can't wait to hear more of her stuff - these hymns are awesome.",
|
29 |
-
"id": "4"
|
30 |
-
},
|
31 |
-
"5": {
|
32 |
-
"label": true,
|
33 |
-
"text": "A moving and suspenseful story!: For anyone familiar with the occult, this book is likely to raise hairs on the back of your neck as you read. Even if you're not, the storyline is suspenseful and fascinating, and the characters evoke great sympathy and admiration. An excellent read.",
|
34 |
-
"id": "5"
|
35 |
-
},
|
36 |
-
"6": {
|
37 |
-
"label": true,
|
38 |
-
"text": "Simple & Easy to Use - A Practical approach to eating out: This guide is extremely to use. It provides sample menus that you'd see at Chinese, Indian and Thai restaurants. Then you are provided with descriptions of each dish and how it is prepared and the ingredients used. From there you are provided with specific considerations as to how the preparation or ingredient list may affect you if you have Gluten or Allergen issues.This book is the size of a passport and very organized and well written. The Chinese, Indian and Thai Cuisine Passport is perfect for making choices while traveling, or while dining at your favorite local restaurant.",
|
39 |
-
"id": "6"
|
40 |
-
},
|
41 |
-
"7": {
|
42 |
-
"label": false,
|
43 |
-
"text": "Being Fair....I am a fan of hers: and I really enjoyed her previous works, more than I could have ever imagined, but this record is horrible. The songs are trite, the lyrics are incredibly boring, indulgent and immature. The music is pop staple, with forgetable melodies and repetative chorus lines, I feel as if the studio wrote the entire album for her while she was sleeping, this just doesn't speak to any of her previous works at all. This album fits on the same shelf with a Nickelodeon-themed CD. Instead of heading in the direction of an artist like Alanis Morrisette, she is going backward and joining the ranks of Hannah Montana and the Naked Brothers Band. She is a great artist and her first two records are amazing. She is better than this CD and I am looking forward to her next effort.",
|
44 |
-
"id": "7"
|
45 |
-
},
|
46 |
-
"8": {
|
47 |
-
"label": false,
|
48 |
-
"text": "Sucked: I thought the DVD sucked tremendously. It was very boring and if I could, I would return it for a refund. There was only one \"small\" clip of Dylan himself. I'm very disappointed.",
|
49 |
-
"id": "8"
|
50 |
-
},
|
51 |
-
"9": {
|
52 |
-
"label": true,
|
53 |
-
"text": "Excellent product: Easy to install. If you have a newer furnace you probably do not need the swail switch as the HE220A comes with a Humistat which can be connected to the furnace. They recommend the Honeywell 32005847-001 Installation Kit, Bypass which is a little pricey and you can probably buy the pieces of this kit cheaper individually from Home Depot or Lowes or ACO as well as the filters.",
|
54 |
-
"id": "9"
|
55 |
-
},
|
56 |
-
"10": {
|
57 |
-
"label": true,
|
58 |
-
"text": "Very happy.: I am very happy with this trashcan. I was unable to find one in the stores to fit the space in my cabinet, but this one does the job. It is very sturdy and looks like it will put up with years of use.",
|
59 |
-
"id": "10"
|
60 |
-
},
|
61 |
-
"11": {
|
62 |
-
"label": false,
|
63 |
-
"text": "These aren't Throughbreds!: This makes me so mad. All these new authors are coming and changing the series. Nothings the same anymore and the plots are repeditive. Don't even bother reading these books until #32 these are like a different series. I don't know excactly what's happing but these new authors suck!",
|
64 |
-
"id": "11"
|
65 |
-
},
|
66 |
-
"12": {
|
67 |
-
"label": false,
|
68 |
-
"text": "Large and slow are a bad combination.: I bought this TV and returned it a week later, because it blurred so badly with motion that sports were unwatchable. I ended up buying a smaller Sony XBR4, and I have none of the issues (plus the picture is far, far better).This has nothing to do with 60 vs 120Hz. That is more important for DVDs and Blu-Ray signals that are 24fps (which doesn't divide evenly into 60 but does for 120). The LT52133 has an 8ms response time, which is extremely slow. A decent LCD should be 5 or lower.If you want an LCD, choose speed and quality over size. If you want size and quality but want to spend less, buy a plasma. Don't buy a big, cheap, slow LCD!I gave it 2 stars because I like the interface and remote.",
|
69 |
-
"id": "12"
|
70 |
-
},
|
71 |
-
"13": {
|
72 |
-
"label": false,
|
73 |
-
"text": "Skip it: This movie is very poorly written and the result is not distressing, just lame. The actors do their best but from very early on it is obvious that the material gives them nothing to work with. Fans of Colin Firth will experience a certain dim level of enjoyment. Minnie Driver is a treat but her character is no better written than the others. Vermont locations are worth something. With one or two moments of exception it's neither comedic nor romantic.",
|
74 |
-
"id": "13"
|
75 |
-
},
|
76 |
-
"14": {
|
77 |
-
"label": true,
|
78 |
-
"text": "Belive it i went to the concert?: hi everyone let me tell you i went to the concert i was amazed with what i saw cher was awsome i tell you buy the dvd. as i sat in front of the stage cher was doing a great job to us the she is living proof . So i urge you to buy it?",
|
79 |
-
"id": "14"
|
80 |
-
},
|
81 |
-
"15": {
|
82 |
-
"label": true,
|
83 |
-
"text": "Vale la pena.: En este libro se narra de una forma muy interesante la vida de una familia en particular. Lo que mas me gusto de este libro fue la manera en que la autora describe a lo largo del libro las personalidades de los sujetos envueltos en la novela; que vienen a ser muy distintos y extremos, lo cual, intensifica el drama... Definitivamente es un buen libro y lo recomiendo a todos.",
|
84 |
-
"id": "15"
|
85 |
-
},
|
86 |
-
"16": {
|
87 |
-
"label": true,
|
88 |
-
"text": "Nummie Children's story: I ordered this book for our grandchildren. Two boys 5 & 3 and a 4 month old girl. All love the story. The mouse is determined.",
|
89 |
-
"id": "16"
|
90 |
-
},
|
91 |
-
"17": {
|
92 |
-
"label": false,
|
93 |
-
"text": "Seem to be alone on this one: Looking at the other reviews, I seem to be the only one that was disappointed with this book. The content is too babyish in most of it for older tweens and the more \"grown up\" content would be over a younger tween's head. I had a quick read through and with every paged turned, I thought duh. I'll be looking around for another book shortly.",
|
94 |
-
"id": "17"
|
95 |
-
},
|
96 |
-
"18": {
|
97 |
-
"label": true,
|
98 |
-
"text": "Best yet: by far the best EA game yet. I especially like the easy controls and kick - a graphics. the playbook is extremely accurate and detailed. Also the fight songs and cheerleaders were a nice touch. this is an excellent game and worth checking out.",
|
99 |
-
"id": "18"
|
100 |
-
},
|
101 |
-
"19": {
|
102 |
-
"label": false,
|
103 |
-
"text": "washed out: A bit like Simply Reds version of the Valentine bros hit \"Moneys too tight to mention\" - this cover version has nothing of the driving energy that characterised the original recording.",
|
104 |
-
"id": "19"
|
105 |
-
},
|
106 |
-
"20": {
|
107 |
-
"label": true,
|
108 |
-
"text": "great water bottle: I love this bottle it is great. I like knowing it is non toxic and it just works very well. You can have it full and lay it down and it doesn't leak at all.",
|
109 |
-
"id": "20"
|
110 |
-
},
|
111 |
-
"21": {
|
112 |
-
"label": true,
|
113 |
-
"text": "Nice goggles: I am pretty happy with these goggles. They work well during swim workouts in the pool. I do notice a little bit of fogging from time to time. I had hoped to wear them during an upcoming triathlon, but based on a few instances where they slipped a little in the pool I am concerned that they won't be secure enough. I will keep using them in the pool, but will likely get different ones for open water races.",
|
114 |
-
"id": "21"
|
115 |
-
},
|
116 |
-
"22": {
|
117 |
-
"label": false,
|
118 |
-
"text": "aaahhh nnnoooooo!: Hopefully the last film in one of the worst horror trilogys ever made. This series pretty much ruined the horror film for years to come, for one its too self aware, thats incredibley annoying, second everyone acts like they are on Friends or some sitcom. The acting is just plain bad and unconvincing. Now the gore, if you're going with material this weak you should load it up with disgusting violence, is there any in the Scream series? No.Everyone went to see this movie just to see who THE KILLER is. This movie sets low standards to be met, you expect alot of people to die, one shock, then we find out who the killer is, then you go home. Every horror film being made today is like that, there's nothing new or exciting or risk taking, its the same stuff over and over and people are laping it up like dog food.This film is what you've come to expect, bad acting, some people die and we eventually find out who the killer is and all is merry and well. Pathetic.",
|
119 |
-
"id": "22"
|
120 |
-
},
|
121 |
-
"23": {
|
122 |
-
"label": true,
|
123 |
-
"text": "A classic of its kind: This movie is a classic of its kind and much better that a lot of movies, that followed. It is not one of the best, but it still deserves five stars...",
|
124 |
-
"id": "23"
|
125 |
-
},
|
126 |
-
"24": {
|
127 |
-
"label": false,
|
128 |
-
"text": "Nice suite, but Virtual PC 7 disappoints on my G5: I purchased the upgrade since I'd already bought both Office v.X and Virtual PC 6.1 last year.The biggest letdown is that Microsoft's promised support for the G5 is nearly non-existent. I have a dual processor G5 with an ATI Radeon 9800 card (Apple), and after trying to install Virtual PC 7 three times, I cannot get a VM to work. It did install (and work) flawlessly on my G4 Powerbook. Googling for reviews finds it's very hit or miss, but if (when) it misses, you'll regret investing the extra $$$ in an immature product.",
|
129 |
-
"id": "24"
|
130 |
-
},
|
131 |
-
"25": {
|
132 |
-
"label": false,
|
133 |
-
"text": "Okay player, don't expect a miracle: I bought this DVD player at Circuit City earlier this yr for about a $100. I hooked it up to a 47\" Vizio LCD (which by the way has an awesome picture) using a HDMI cable. After fine tuning this product, I was very, very, very diasppointed. The picture was very \"grainy\" (lots of pixels). I have a $35 DVD player that only utilizes an s-video cable that produces a much more crisp picture. Be warned, the picture stinks.",
|
134 |
-
"id": "25"
|
135 |
-
},
|
136 |
-
"26": {
|
137 |
-
"label": true,
|
138 |
-
"text": "A revelation of the science of consciousness evolution and all natural growth: Here is a readable and fascinating account of the development of the new science of chaos theory, the only body of ideas that describes how the natural world as experienced by human beings emerges out of basic quantum processes. The different explorers and innovators of the new science are introduced in a personable way that will enchant the interested reader.",
|
139 |
-
"id": "26"
|
140 |
-
},
|
141 |
-
"27": {
|
142 |
-
"label": false,
|
143 |
-
"text": "Don't say that I didn't warn ya' !: I'm absolutely convinced that Delbert McClinton had no controlover the release of this CD. I rated it 1 star simplybecause there is no 0 star rating ! In actuality , I am not certain that the vocalist on this recording IS Delbert McClinton. Only on the Mr. Pitiful track is there any similarity at all to Delbert's voice. This is the perfect CD for someone with money to burn who would like to have a recording of a 1960's garage band recorded in a garage and who should be working in a garage ! Delbert fans...run fast and run far away from this ! END",
|
144 |
-
"id": "27"
|
145 |
-
},
|
146 |
-
"28": {
|
147 |
-
"label": false,
|
148 |
-
"text": "This item is not available: I ordered this unit on February 7th. Every time I checked back on the status of the order, it read \"not shipped\" and the estimated shipping date got moved out. I really don't think this unit is avaialble from the company anytime soon. I cancelled the order.",
|
149 |
-
"id": "28"
|
150 |
-
},
|
151 |
-
"29": {
|
152 |
-
"label": false,
|
153 |
-
"text": "I used to like ABBA...: I used to like ABBA, until I saw Mama Mia! A horribly disjointed musical, where songs feel contrived to fit into the story; a story that doesn't seem to come together. Individual songs are usually done alright, but don't segue from one to another very well.The cast butchered several of the songs, but especially S.O.S, Take A Chance On Me, and anything where Pierce Brosnan sang. On a side note, I also counted at least two violations of Chekov's Gun. And finally, I think it has a bad moral message. Which you only recognize if you manage to sit through the whole thing.If there is justice in the world, cast members without established careers won't get to have them as punishment for the worst movies I've seen since The Talented Mr. Ripley.",
|
154 |
-
"id": "29"
|
155 |
-
},
|
156 |
-
"30": {
|
157 |
-
"label": false,
|
158 |
-
"text": "A complete disaster!: If you're like me, you probably wanted to check out this movie because it sounded like it really could be an excellent supernatural Gothic horror tale full of goblins and wicked things alike. Well, don't make the same mistake I did and actually watch it. It's horrible. Terrible. An honest to goodness waste of film. The acting is wretched, the film quality is rotten (it actually looks twenty years older than it is), and the plot is thin, weak, and does not give you what it's supposed to. The only reason I bothered to give this film 1 star is because of Alexis Arquette -- he's great looking, but should have left this film out of his career.",
|
159 |
-
"id": "30"
|
160 |
-
},
|
161 |
-
"31": {
|
162 |
-
"label": true,
|
163 |
-
"text": "beautiful detail: I just purchased these Dover COloring Books for my mother and she loves them. The detail is out of this world and the variety of colors you can use are only limited by your inagination. HIGHLY RECOMMENDED!",
|
164 |
-
"id": "31"
|
165 |
-
},
|
166 |
-
"32": {
|
167 |
-
"label": false,
|
168 |
-
"text": "Very disappointed: I looked forward to getting this movie as I had heard many good things about it but it was nothing like I had imagined or been led to believe. There is very little actual history in it or real Christian experience except for the background because the main focus is a soap opera style romance and caricature figures. I agree with the reviewer who described it as a mixture of \"tawdry Hollywood sex\" somehow interspersed with a vague nod to Christianity. The only decent scene was the arena scene where the Christians are going to their deaths singing hymns - but that's not enough to make it a great or even a good movie. Not personally to my taste anyway.",
|
169 |
-
"id": "32"
|
170 |
-
},
|
171 |
-
"33": {
|
172 |
-
"label": false,
|
173 |
-
"text": "Unreliable minikit: I bought this minikit because it got good reviews and it would be perfect for my purposes. However it switches on and off whenever it wants, it looses contact with the phone. Very often the on/off button works only in a horizontal position (?) I use a Treo 650, which is on the compatible phone list. When I contacted Parrot, they said it wasn't (?) At last I opened the unit, but there are no moving parts inside except the micro switches. It is giving me a headache, so I will go searching for an alternative.",
|
174 |
-
"id": "33"
|
175 |
-
},
|
176 |
-
"34": {
|
177 |
-
"label": true,
|
178 |
-
"text": "A Christmas Classic!: This is surely one of the best classical Christmas recordings available. Don't buy the older version, as the quality of this recording is excellent. This is one of those \"Every Christmas - Can't have Christmas without\" recordings.",
|
179 |
-
"id": "34"
|
180 |
-
},
|
181 |
-
"35": {
|
182 |
-
"label": false,
|
183 |
-
"text": "too narrow: These were the narrowest pair of D size shoes I have ever tried on. I don't care how nice a shoe looks. If it don't fit it just don't fit.",
|
184 |
-
"id": "35"
|
185 |
-
},
|
186 |
-
"36": {
|
187 |
-
"label": false,
|
188 |
-
"text": "Lack of extension: This earphones lack a descent extension cord. ITs very small cable, but its of good quality. Sadly, cord its too short, and the extension is useless.",
|
189 |
-
"id": "36"
|
190 |
-
},
|
191 |
-
"37": {
|
192 |
-
"label": true,
|
193 |
-
"text": "Easy-Reading: This is the 3rd Southern Sisters Mystery I've read. They're easy, fast and funny murder mysteries, with lots of cute family stories intertwined in the intrigue.",
|
194 |
-
"id": "37"
|
195 |
-
},
|
196 |
-
"38": {
|
197 |
-
"label": false,
|
198 |
-
"text": "it'd be great if it worked like it was supposed to: for the first 30 seconds it was lovely, but i believe that either the motor isn't powerful enough to keep the shaft rotating smoothly or 3 AA batteries just don't provide enough juice for the motor to work more than 30 seconds. it was a nice idea, but i'm rather dissapointed. the jelly material is somewhat difficult to maintain also. i think if it were hooked up to a larger battery pack it'd be WONDERFUL... which i think i may have a macgyver friend with a knack for electronics attempt to do for me.",
|
199 |
-
"id": "38"
|
200 |
-
},
|
201 |
-
"39": {
|
202 |
-
"label": true,
|
203 |
-
"text": "Not Hornby's best but still good: I loved About a Boy and really, really loved the sardonic wit of High Fidelity. About a Boy is much deeper but just as cynical. Maybe even more so. The characters are richly drawn and just complex enough to keep the reader wanting more. Good read, but best to take some time with this one. Not recommended for a summer beach read.",
|
204 |
-
"id": "39"
|
205 |
-
},
|
206 |
-
"40": {
|
207 |
-
"label": false,
|
208 |
-
"text": "A Disappointment: As with most Taunton Press publications, the illustrations and photographs in this book are spectacular and the organization and layout is superb. Nonetheless, I found this book disappointing. It lacks both depth and breadth. I had hoped for a detailed review of wood joinery including some of the more unusual joinery found in Japanese woodworking. This book, however, is targeted more toward the beginner. Even so, it does not cover the details and \"tricks\" of even the most basic techniques in sufficient detail to allow beginners to easily reproduce them. Consequently, it is unclear who this book was written for - not the beginner as it lacks depth, and not the advanced woodworker as it lacks breadth. Far more effort appears to have been put into appearance and organization than in content.",
|
209 |
-
"id": "40"
|
210 |
-
},
|
211 |
-
"41": {
|
212 |
-
"label": false,
|
213 |
-
"text": "Horrible. Don't do it!: Great price for the item when a 6' one of these at Best Buy is $20. Thing is, the one from Best Buy fits in the outlet and stays there. This cord fits very loose and does not connect. I bought 2 of them, neither did what they were suppose to.As much as I hate to say it, but, buy the more expensive one. At least it works.",
|
214 |
-
"id": "41"
|
215 |
-
},
|
216 |
-
"42": {
|
217 |
-
"label": true,
|
218 |
-
"text": "Given as a gift...: Given to my best friend as a gift. She loves it. Her fiance enjoys making coffee for her in the mornings. :)",
|
219 |
-
"id": "42"
|
220 |
-
},
|
221 |
-
"43": {
|
222 |
-
"label": true,
|
223 |
-
"text": "Love the ring.: This is a nice ring. I was worried it out be thin and cheap looking, but it's not. It's a very pretty stylish ring. Go for it.",
|
224 |
-
"id": "43"
|
225 |
-
},
|
226 |
-
"44": {
|
227 |
-
"label": false,
|
228 |
-
"text": "Beautiful writing Marred by One-Note Characterizations: How could Kingsolver have ruined her book with such an obvious error? Nathan is a strident paper doll that flattens the whole story. Just as bad, the author has all the narrators using the same ironic tone to decribe him, deadening their voices as well. At the same time, Kingsolver doesn't have the guts to show him doing something trully terrible. I don't trust an author who can't let the reader make up his own mind, and as a consequence I couldn't trust her views about ANYTHING in the story. I'm giving this two stars for her descriptions of the African landscape, and that is all.",
|
229 |
-
"id": "44"
|
230 |
-
},
|
231 |
-
"45": {
|
232 |
-
"label": false,
|
233 |
-
"text": "Much worse than any cordless phone I've ever had: This phone cuts out only 2 rooms away from the base station. There is static noise, and callers on the other end complain about sound quality. I can't go into the garden, which used to be no problem with my old 900 MHz phone.",
|
234 |
-
"id": "45"
|
235 |
-
},
|
236 |
-
"46": {
|
237 |
-
"label": false,
|
238 |
-
"text": "Waste of time & money: The first Hangover was not too bad, this one was just terrible. The acting is bad, the script is bad, everything about this movie was just bad. Do yourself a favor, don't buy this movie as it is a total waste of time and money.",
|
239 |
-
"id": "46"
|
240 |
-
},
|
241 |
-
"47": {
|
242 |
-
"label": false,
|
243 |
-
"text": "Did Not Work For Me!: Impressive You Tube Video (Like a Sci-Fi Fantasy). In reality it's a high speed Easy Out so unsurprisingly it broke faster than an Easy out. This product did not work for me. The drill part did not drlil, the puller part did not pull. It was a total zero.",
|
244 |
-
"id": "47"
|
245 |
-
},
|
246 |
-
"48": {
|
247 |
-
"label": true,
|
248 |
-
"text": "Excellent book, long overdue.: From a very long time women were told that looking good was of utmost importance. This was without regard to health or fitness and how age affected these parameters. Witness the whalebone and other types of corsets, the spike heeled shoes and the numerous weight loss programmes on the market (some of which are downright dangerous). Now there is a book, backed by solid research, that allows women of all ages to remain fit and healthy for a lifetime. I am certainly going to recommend this book to all the women I know.Bentley Norville",
|
249 |
-
"id": "48"
|
250 |
-
},
|
251 |
-
"49": {
|
252 |
-
"label": false,
|
253 |
-
"text": "not an all star: Not a practical guide in this collecting age. Does NOT have a comprehensive list; meaning it does NOT cover all manufacturers and, more importantly, for the ones it does, only provides listings of the base set. That means no insert or variation pricing whatsoever. Also, no oddball or minor league issues are listed. Generally speaking, unless you are collecting base sets prior to the advent of inserts and alternate versions of the base set, this guide is fairly useless.",
|
254 |
-
"id": "49"
|
255 |
-
},
|
256 |
-
"50": {
|
257 |
-
"label": false,
|
258 |
-
"text": "Again, second rate city, third rate writer: Just another example of Mr. Lindberg's pitiful attempt at exhibiting a strong expertise on a subject with which he is clearly obsessed. Don't waste your time with this book, either. It is poorly written and fails to engage the reader. You might consider using this book and the first book he wrote on the same subject, as a pair of bookends. That is about all they are worth.",
|
259 |
-
"id": "50"
|
260 |
-
},
|
261 |
-
"51": {
|
262 |
-
"label": true,
|
263 |
-
"text": "Reality: No one should need to convince you to buy this book, you should just do it! It's so well written and worded and brings you right to the heart of a sexual reality that most people like to pretend doesn't really live and breath in their fair cities. I never again want to hear someone bad mouth a working girl for what she does. I will and do now however look at men with a curious eye wondering if they are depraved peep show window lickers :)",
|
264 |
-
"id": "51"
|
265 |
-
},
|
266 |
-
"52": {
|
267 |
-
"label": false,
|
268 |
-
"text": "Bummer: Visual effects and Battle footage were great...the other 85% of the movie was just lousy fluff...",
|
269 |
-
"id": "52"
|
270 |
-
},
|
271 |
-
"53": {
|
272 |
-
"label": true,
|
273 |
-
"text": "The spark of idependence: Filled with the independent spark that made us all love life at one point or another. A fun, introspective and nonsensical movie that sticks with you.",
|
274 |
-
"id": "53"
|
275 |
-
},
|
276 |
-
"54": {
|
277 |
-
"label": true,
|
278 |
-
"text": "What I expected from Mirman's website. Funny. Funny. Russian.: lol, gotta love Eugene. Even when his audience doesn't initially laugh, he gets in a good zinger at himself and they laugh at that. He's witty without being condescending, and uncomplicated without seeing contrived. However, if you're not a fan of irreverant humor, this may not be for you.",
|
279 |
-
"id": "54"
|
280 |
-
},
|
281 |
-
"55": {
|
282 |
-
"label": false,
|
283 |
-
"text": "Do not...repeat...do not bother!: It is not often that I offer a negative review but this compilation while attractive does not deliver at all.The foot massage gizmo is awkward and uncomfortable.The pumice stone leaves rough splinter like skin.The foot scrub doesn't reall scrub.The rotary action tool has five heads, none of which work well and you must hold the switch in place or it turns off. It is cumbersome and ineffective.The one star was initially given for a foot brush (which later lost its bristles very easily as I update the review) and a sweet smelling foot repair balm.Don't waist your money. Soak your feet and invest in an inexpensive German Titania file, smooth and coarser side, or a like product. It will last for years.",
|
284 |
-
"id": "55"
|
285 |
-
},
|
286 |
-
"56": {
|
287 |
-
"label": false,
|
288 |
-
"text": "Not Sandra's Best: Ms. Brown has written better romance novels. Don't give up on her if this was your first Sandra book.The feeble female lead struggles with a 15-year crush that walks back into her life. The smug male lead acts like a jerk through most of the novel. The romance scenes grapple to muster up passion but fall short. Both of the main characters bothered me; my favorite character was the 17-year old.A quick read...about 4 hours (with interruptions) for me...but probably not worth it.",
|
289 |
-
"id": "56"
|
290 |
-
},
|
291 |
-
"57": {
|
292 |
-
"label": true,
|
293 |
-
"text": "Impressed: Lots-O-Fun. Wood and glass toys are high quality and are a good fall back for the kids to play with they are \"bored\". Would buy again.",
|
294 |
-
"id": "57"
|
295 |
-
},
|
296 |
-
"58": {
|
297 |
-
"label": false,
|
298 |
-
"text": "Light turned on by itself 3 times: The installation was easy. I used it for a week, everything worked fine, EXCEPT the light it connected to turned on by itself 3 times so far, with no one near to either one of the switch. Not sure whether it is a defective unit, or this product is too sensitive to noise. I'm returning this product and will just install a regular switch instead.",
|
299 |
-
"id": "58"
|
300 |
-
},
|
301 |
-
"59": {
|
302 |
-
"label": true,
|
303 |
-
"text": "good battery: I feel kind of silly writing a review for a battery, but have to say that these last a LONG time. Work very well.",
|
304 |
-
"id": "59"
|
305 |
-
},
|
306 |
-
"60": {
|
307 |
-
"label": true,
|
308 |
-
"text": "Even a Woman finds it funny: Yes, even a woman finds \"Married to Mommy\" funny. The book gets you laughing aloud when it is trying to make fun of \"Mommies\". The truth is that it really is making fun of the stupidity of men and their simple basic needs of sex, getting out of work, and beer. Of course, the truth is always funny.A definite MUST for any woman, married or not. We will now know all the secret tricks the men try to use on us.By the way, I am NOT a MOMMY!",
|
309 |
-
"id": "60"
|
310 |
-
},
|
311 |
-
"61": {
|
312 |
-
"label": true,
|
313 |
-
"text": "Gungrave...not quite what you might expect: Those thinking this is another version of Trigun will be disappointed. Gungrave is actually a lot deeper and more complex. The lead is short on dialouge, but the story has more depth and character development than most anime. The first DVD is more about the main character's past than about the reanimated killing machine he's become, but it definitely leaves you wanting more.",
|
314 |
-
"id": "61"
|
315 |
-
},
|
316 |
-
"62": {
|
317 |
-
"label": true,
|
318 |
-
"text": "Error in product description: It's great in every way. However, if you'd prefer a digital tuner (as I do), then you might need to look further. The product description boasts a digital AM/FM tuner, but it's disappointingly an analog AM/FM tuner.Overall - especially for the price - I think it's pretty good.",
|
319 |
-
"id": "62"
|
320 |
-
},
|
321 |
-
"63": {
|
322 |
-
"label": true,
|
323 |
-
"text": "good phone but not as user friendly as it could be: Battery life is very good. Phone has good range. My only complaint is it's to involved to get your message from the handset.",
|
324 |
-
"id": "63"
|
325 |
-
},
|
326 |
-
"64": {
|
327 |
-
"label": false,
|
328 |
-
"text": "Big waste of money (and space in my house!): My 5 year old son wanted this so bad, but when we got it for him, there were so many pieces to put together that didn't fit together well, he never played with it. It just sits on our floor in many pieces taking up toy space! What a waste!",
|
329 |
-
"id": "64"
|
330 |
-
},
|
331 |
-
"65": {
|
332 |
-
"label": true,
|
333 |
-
"text": "Don't want to take it off: Very satisfied with an earlier purchase of this Bali bra model, I was just as pleased with the new one. Very comfortable, well made and a good neutral color. It will be my next choice, too.",
|
334 |
-
"id": "65"
|
335 |
-
},
|
336 |
-
"66": {
|
337 |
-
"label": true,
|
338 |
-
"text": "Fantastico: If anybody who's into rock music is ever looking for a band to keep you on your toes, this is the band. I've been a fan for 10 years now, and no album has ever sounded like any of their previous albums. This disc is fantastic with such a variety of styles, as are the previous releases, even back to the Rainbow Butt Monkey days.",
|
339 |
-
"id": "66"
|
340 |
-
},
|
341 |
-
"67": {
|
342 |
-
"label": false,
|
343 |
-
"text": "too much visual: There are far too much designs, visuals, colors, etc in the book - this is highly distracting, as TV screen can be...By way of example (among so many...), what is the use of colors with the three squares of the Pyth. theorem???? this is as useless as writting 2+3=5 with 2 in blue, 3 in red and 5 in yellow...I wish I had purchased the 2nd edition, which according to reviews was closer to what I was looking for.",
|
344 |
-
"id": "67"
|
345 |
-
},
|
346 |
-
"68": {
|
347 |
-
"label": true,
|
348 |
-
"text": "Aretha's First Arista Release Showed Pleasures to Come: After a long and musically satisfying career with Atlantic, Aretha severed her ties with that company and moved under the wing of Arista's Clive Davis. With the start of the 1980's, Aretha was looking for new territory to conquer and almost succeeded with this mixed bag.\"United Together\" is a fine tune that benefits from beautiful orchestral arrangement that is matched by Aretha's superb vocal instrument. The remake of \"Can't Turn You Loose\" allows Aretha to show why she is the Queen of Soul\" for she really belts this one out. Another cover, that of the Doobies' \"What a Fool Believes,\" is an interesting interpretation. The final cut \"School Days\" appears to be \"autobiographical\" for every girl growing up in the fifties.Although not as strong as her Atlantic work, \"Aretha\" is still a suitable addition to the artist's discography.",
|
349 |
-
"id": "68"
|
350 |
-
},
|
351 |
-
"69": {
|
352 |
-
"label": false,
|
353 |
-
"text": "Misguided Purchase: The photo and description do not reflect the product. The screen panel kit I received was white. What a huge inconvenience during a time-crunch.",
|
354 |
-
"id": "69"
|
355 |
-
},
|
356 |
-
"70": {
|
357 |
-
"label": false,
|
358 |
-
"text": "Banacek: My husband and were looking forward to seeing this series.The first show was SO boring, we finally just quit watching it.Actually, we haven't gotten around to watching anymore. I guess we were afraid of a repeat.Maybe that was just once, I hope!",
|
359 |
-
"id": "70"
|
360 |
-
},
|
361 |
-
"71": {
|
362 |
-
"label": true,
|
363 |
-
"text": "JDT: Uncle Tupelo is without doubt one of the most under appreciated groups of the 90's. Anodyne, like each of the three albums that came before it, has everything that a remarkable recording requires: great songs, honest lyrics, and artists who really care about the music they are making. Like the best of Dylan and Springsteen, the songs are about real people with real troubles and joys. When you hear them you know they are coming from the heart. The songs contributed by Jay Farrar and Jeff Tweedy are easily differentiated by the voacls, music, and lyrics. What makes this record interesting is how well these unique sounds compliment each other. The union is seamless.",
|
364 |
-
"id": "71"
|
365 |
-
},
|
366 |
-
"72": {
|
367 |
-
"label": true,
|
368 |
-
"text": "Well Worth Reading: First a confession: Miriam Wasserman was my mother. However, she published several books, but this is the only one I really found useful. She walks the reader through the New York City school system and the attitudes of different groups involved in the system back in the 1960s. This includes parents, teachers and administrators. Her view is that the further away one got from parents and students, the more prestige one had. She meticulously describes the teachers' strike of 1968 against \"community control of schools\", a strike of which she is extremely critical. She explores the racism that was involved in this strike, including using quotes from striking teachers, etc. It should be emphasized that the author was pro-union all her life, so her views don't stem from an anti-union bias. The book also covers the high school student rebellion which coincided with and followed the strike.",
|
369 |
-
"id": "72"
|
370 |
-
},
|
371 |
-
"73": {
|
372 |
-
"label": true,
|
373 |
-
"text": "compact and loaded: I bought this phone after reading the cnet reviews and really liked it. It looks small and really compact. I like the camera pics at 2 mega pixel and bright flash. The mp3 player is crisp. The headset that comes along delvers amazing fM radio. I think my phone is not very loud and you have a problem when you are around a noisy crowd. I just bought this phone again for my cousin. He likes it too. Almost forgot the display is very good.",
|
374 |
-
"id": "73"
|
375 |
-
},
|
376 |
-
"74": {
|
377 |
-
"label": true,
|
378 |
-
"text": "Outstanding text!: Brooks/Cole should keep this text in their catalog for ages! It is well-written, examples are generally quite clear, vocabulary is introduced well, and the exercises develop real skills, rather than simply be busy-work. One of the best calculus books ever!",
|
379 |
-
"id": "74"
|
380 |
-
},
|
381 |
-
"75": {
|
382 |
-
"label": true,
|
383 |
-
"text": "Excel 2003 Bible: Very good source of information. I will most likely buy other books in this series.",
|
384 |
-
"id": "75"
|
385 |
-
},
|
386 |
-
"76": {
|
387 |
-
"label": true,
|
388 |
-
"text": "Tasting is Believing: Gluten-free breads used to have a gritty texture from the rice flour, and were too soft for sandwiches. Bette Hagman uses garbanzo/fava bean flour, sorghum flour, tapioca flour, and corn starch to create breads which have a similar texture to wheat flour breads, and the flavors of her breads are fabulous.My BF bought me this book and a great tasting beverage to drink it with. Since he knows I quit coffee recently, he's been really wonderful helping me in cope with my mood swings. S o y f e e is made from soy beans that is roasted just like coffee. I enjoy the taste and don't miss coffee one bit. Buy it online at www.s o y c o f fee.com.This is a 'must have' for anyone baking gluten-free. I think all of Bette Hagman's books are wonderful and a must for those with gluten intolerance.",
|
389 |
-
"id": "76"
|
390 |
-
},
|
391 |
-
"77": {
|
392 |
-
"label": true,
|
393 |
-
"text": "5 stars for the show, no stars for the \"Collector's Edition\": I was really looking forward to getting this Collector's Edition and see what extras were added. I knew it wasn't a lot - just a mini-book and a documentary - but I figured it would be packaged in a cool way.Wrong.As others have already mentioned, the Collector's Edition is *literally* theAvatar: The Last Airbender - The Complete Book 1 Collectionslipped into another cardboard box, with a little booklet and DVD in an envelope (not even a case!) wedged in. It's really disappointing; it would have been so easy to create a quality Collector's Edition but the studio couldn't be bothered, I guess.",
|
394 |
-
"id": "77"
|
395 |
-
},
|
396 |
-
"78": {
|
397 |
-
"label": true,
|
398 |
-
"text": "sula scottcampos: Sula, a book that talks about the issues of being a black women is a really good novel to read.One of the reasons I recommend it is because of its realism and its themes - death, sex, friendship and poverty.I also think that its characters are very good, its easy to identify with one or both of them. I really recommend this book to anyone who enjoys good literature.",
|
399 |
-
"id": "78"
|
400 |
-
},
|
401 |
-
"79": {
|
402 |
-
"label": true,
|
403 |
-
"text": "Fantastic! It's a must-have for girls!: I hated razor, tried shaving but it did not work for me. Shaving made the hair grows thicker and faster afterwards, plus the roots are impossible to be getting rid of. After reading the reviews, I ordered it to try, I used it for once and already fall in love with this. I used to use small tweezer to pluck out my leg's hair, in order to avoid the razor, it took me a few hours to do that but this super electronic tweezer works wonder! You won't see the black roots and I have smooth and silkly legs in 20 mins. It does not hurt at all, if you use it on your legs. But, if you use it at your under arm, it won't be a pleasant feeling, of course! I will never use anything else besides this for hair removing anymore! highly recommended!",
|
404 |
-
"id": "79"
|
405 |
-
},
|
406 |
-
"80": {
|
407 |
-
"label": false,
|
408 |
-
"text": "This is not a toy: I guess I was expecting more out of these leave window decals. I just didn't find them attractive after placing them on my window, they seem very cheap, I guess because they are cheap.I threw them away.",
|
409 |
-
"id": "80"
|
410 |
-
},
|
411 |
-
"81": {
|
412 |
-
"label": true,
|
413 |
-
"text": "Wonderful book for anyone running a professional hatchery: This book is aimed more for hatcheries that are raising Trout, Salmon, Catfish and other food fishes. However, there is so much information in this book that even ornamental fish hatcheries will find an incredible amount of useful information. The chapters on Fish Nutrition are especially helpful.",
|
414 |
-
"id": "81"
|
415 |
-
},
|
416 |
-
"82": {
|
417 |
-
"label": true,
|
418 |
-
"text": "Amazing book!!: Once again, Eric Victorino's artistic talent is put into this great free-verse poetry book. I couldn't put it down and I finished it the day I received it in the mail. All of the poems are awesome but the one I found the most interesting was \"It's A People Business.\" All of the experiences in his life, personally and with his band, come to life in this book. Please check it out! It's worth every penny!!",
|
419 |
-
"id": "82"
|
420 |
-
},
|
421 |
-
"83": {
|
422 |
-
"label": true,
|
423 |
-
"text": "The white trumpet contender respect Miles Davis!: The story of the Jazz in the Fifties certainly would be remain unfinished without the ominous presence of this outstanding virtuoso. Baker sound still possesses this alluring hook, this magnetic engagement charm, eloquent expressiveness, enrapturing lyricism and contagious rhythm, despite the elapsed time, which confirms by itself the status of his musicianship.This selection is jus a little sample of the broad universe of his genius. A well thought selection of great musical successes, available, preserved and immortalized by the Digital Technology for our future enjoyment.Absolutely indispensable in your treasured collection.",
|
424 |
-
"id": "83"
|
425 |
-
},
|
426 |
-
"84": {
|
427 |
-
"label": false,
|
428 |
-
"text": "What the?: I'm sorry, maybe it's just me but I can't helping stating that this has to be one of the wrost movies I've seen in my life!Can you say boring? Can you say doesn't make sense at all? The first 30 minutes of the movie were O.K. But it went downhill after that. This movie is a prime example of a director attempting to make a deep movie with a meaningful lesson but failed on all levels. I don't recommend this movie unless you want to go to sleep or you don't have anything else to do.",
|
429 |
-
"id": "84"
|
430 |
-
},
|
431 |
-
"85": {
|
432 |
-
"label": true,
|
433 |
-
"text": "very very good!!!!: linda blair is a young girl who is possessed. and her mother doesn't know what to do until one day when she hears her daughter screaming and stabbind herself she knows what to do GET AN EXORCIZIM!!!",
|
434 |
-
"id": "85"
|
435 |
-
},
|
436 |
-
"86": {
|
437 |
-
"label": true,
|
438 |
-
"text": "Awesome product for the price!: This range extender works as advertised! I am very happy with the purchase. I was a little worried after reading some of the horror stories here, but I have to say, Chovy's review instructions (on this site) were just this ticket to get the repeater up and running in less than 30 minutes. It was unbelievably easy to install! Do not be frightened by negative reviews. If you can set up a wireless network, you can set up this repeater. However, I did upgrade the firmware before I did anything else and maybe that helped. I got the firmware update from the Belkin site.",
|
439 |
-
"id": "86"
|
440 |
-
},
|
441 |
-
"87": {
|
442 |
-
"label": false,
|
443 |
-
"text": "Slight: This book is either a heavily illustrated short story collection or a text-heavy comic. Its unusual format is its most original feature. Its plots are negligible, but its illustrations and text evoke a unique atmosphere of self-conscious nonconformism. Although its target audience is dare-to-be-different teens and college students, its interesting turns of phrase and expressive line drawings are not devoid of interest for general audences.",
|
444 |
-
"id": "87"
|
445 |
-
},
|
446 |
-
"88": {
|
447 |
-
"label": true,
|
448 |
-
"text": "ANgeleyes: Seem to dry up their eyes fairly well, although I haven't seen the color (brown stain) change much yet.",
|
449 |
-
"id": "88"
|
450 |
-
},
|
451 |
-
"89": {
|
452 |
-
"label": false,
|
453 |
-
"text": "Nice Try: Salt Lake 2002 is not a bad game, but it isn't good either. The graphics are excellent, but some of the events are bad. Bobsleigh, and skiing aren't bad but the others are. You dont stay into it for long. I liked it for a while, but it gets boring.",
|
454 |
-
"id": "89"
|
455 |
-
},
|
456 |
-
"90": {
|
457 |
-
"label": false,
|
458 |
-
"text": "Cutler's share of the pie: This book was a major disappointment. I am familiar with books written solely by the Dalai Lama, such as the \"Library of Tibet\" series, which are much more engrossing and have much more substance than Cutler's book. Cutler attempts (successfully, sadly) to have his share of the profitable market that involves the Dalai Lama's writings. The book is insipid, does not try to explain any important issue in the light of Buddhist philosophy, and only rehashes issues that several other westerners already wrote about. It's another big ego trip: we keep hearing time and again about his opportunities to be with the Dalai Lama. What a shame, Cutler. I sold the book as soon as I finished it.",
|
459 |
-
"id": "90"
|
460 |
-
},
|
461 |
-
"91": {
|
462 |
-
"label": false,
|
463 |
-
"text": "Mostly tedious, with interesting parts: I found the writing interesting, and the subject fascinating, but I found myself frustrated by the author's difficulty in talking directly about the status of Muslim women with her interview subjects. The author spent many pages writing about the menus and dress of the many middle and upper-middle class women she interviewed. It seemed as though her interview subjects resisted her efforts to discuss the status of women in their countries, so we too as readers had to wade through much distracting material and misunderstandings about feminism and gender. Great travel stories, but not a great source of information about Muslim women.",
|
464 |
-
"id": "91"
|
465 |
-
},
|
466 |
-
"92": {
|
467 |
-
"label": false,
|
468 |
-
"text": "Sesame Street Toddler: I did not find this game to be as educationally sound as I would expect from Sesame street. There is too much talking before the program will react to a command. The graphics are jerky and the cursor acts like the target is magnetically charged and keeps pushing away the cursor. When the child actually does manage to click on a target, the cursor may still fly to another target and the child is told that his answer is wrong. Another example of educational problems is the pronunciation of \"eggs\" using a long \"a\" sound instead of a short \"e.\" This is not very helpful in teaching a child the sound for short \"e.\" Children that are used to playing computer games by themselves may find that this game is too frustrating to do alone. The open ended learning curve is a great idea. I just wish Sesame Street would hire a truly qualified literacy expert to help clean up the many problems in this program.",
|
469 |
-
"id": "92"
|
470 |
-
},
|
471 |
-
"93": {
|
472 |
-
"label": false,
|
473 |
-
"text": "needs a buzz cut and a point: I avoided reading this book, not because of the hermaphrodite subject matter, but because I have never read a multigenerational family saga that I liked. Many books let me down in the middle, and this was no exception. The beginning of the book was incredible and harrowing, with momentum and characterization. The post-America nextgens part of the saga was so boring I found myself flipping and flipping - always a bad sign. If there was some kind of larger point to all of that, then I must have missed it. Yes there's the identity duality and trinity themes playing out here: man/woman, greek/turkish/american modern/old world sick/healthy innocent/guilty original/reinvented. But it was almost as if the author was saying - here it is again - get it? I like my fiction much more subtle than this.",
|
474 |
-
"id": "93"
|
475 |
-
},
|
476 |
-
"94": {
|
477 |
-
"label": false,
|
478 |
-
"text": "OMG! DO NOT BUY!: I normally don't take the time to submit a review.In this case however, I feel obligated to do so.This is by far one of the worst purchases I have ever made.Here's why.....The contraption is far too bulky.The case's enclosing is unbearable, takes a good minute or so to open it.The texture of the material feels like a cheap toy.The overall design is horrible, something I could make in my basement.For the love of everything sacred, do not buy this thing.",
|
479 |
-
"id": "94"
|
480 |
-
},
|
481 |
-
"95": {
|
482 |
-
"label": true,
|
483 |
-
"text": "Good price, good quality: Comparable HDMI cables can be bought for 45 or more. Even though the price is cheap the quality is good, no problems so far.",
|
484 |
-
"id": "95"
|
485 |
-
},
|
486 |
-
"96": {
|
487 |
-
"label": true,
|
488 |
-
"text": "Good rock music: This is what i call rock music good beat and good lyrics, don't listen to the other reviews. This cd is one of the best, listen to a few songs and you will get hooked. I recommend this cd its awesome.",
|
489 |
-
"id": "96"
|
490 |
-
},
|
491 |
-
"97": {
|
492 |
-
"label": false,
|
493 |
-
"text": "BORING!: This movie is soo boring. How in the hell did this movie make so much at the box office. Do people really want to pay for crappy movies like this. bottom line this is a chick flick nothing is good. And now they are re-releasing this movie with more boring stuff. This is the worst movie ever.",
|
494 |
-
"id": "97"
|
495 |
-
},
|
496 |
-
"98": {
|
497 |
-
"label": false,
|
498 |
-
"text": "Already Rusting: Inferior quality. The plating is thin and rust is coming through the finish. Inexcusable for a product that is designed for use in a humid environment.",
|
499 |
-
"id": "98"
|
500 |
-
},
|
501 |
-
"99": {
|
502 |
-
"label": false,
|
503 |
-
"text": "confusing internet setup: i wanted a camera that could email photos but this camera will not go out through the router and the manual setup , to punch a hole thru router is confusing.",
|
504 |
-
"id": "99"
|
505 |
-
},
|
506 |
-
"55066581ad334ef5844c6f7707525010": {
|
507 |
-
"label": true,
|
508 |
-
"text": "Thought this was super cool, and a really important step in all the physical books' preservation.",
|
509 |
-
"id": "55066581ad334ef5844c6f7707525010"
|
510 |
-
},
|
511 |
-
"fef14d13366f482d9f4e0726b357f178": {
|
512 |
-
"label": true,
|
513 |
-
"text": "There are some amazing hikes around Mt. Fuji.",
|
514 |
-
"id": "fef14d13366f482d9f4e0726b357f178"
|
515 |
-
},
|
516 |
-
"70aed7369aa74031a06f5f3155476d7c": {
|
517 |
-
"label": true,
|
518 |
-
"text": "Thought this was super cool, and a really important step in preserving all the physical books.",
|
519 |
-
"id": "70aed7369aa74031a06f5f3155476d7c"
|
520 |
-
},
|
521 |
-
"ac65d14b710648b8bf3c2a53caf6ac91": {
|
522 |
-
"label": false,
|
523 |
-
"text": "The profits of the business that was most successful were still negative.",
|
524 |
-
"id": "ac65d14b710648b8bf3c2a53caf6ac91"
|
525 |
-
},
|
526 |
-
"ce00e6b1547444259a13c55654e66500": {
|
527 |
-
"label": true,
|
528 |
-
"text": "love them best, they reconnect in hysterically funny and emotionally significant ways.",
|
529 |
-
"id": "ce00e6b1547444259a13c55654e66500"
|
530 |
-
},
|
531 |
-
"8943a94d205b43ceb4420d5ab9c5611a": {
|
532 |
-
"label": true,
|
533 |
-
"text": "Walt Disney's timeless masterpiece is an extravaganza of sight and sound! See the music come to life, hear the pictures burst into song and experience the excitement that is Fantasia over and over again.",
|
534 |
-
"id": "8943a94d205b43ceb4420d5ab9c5611a"
|
535 |
-
},
|
536 |
-
"6af8fc3dd30d4f8caf5a2929fc88534b": {
|
537 |
-
"label": false,
|
538 |
-
"text": "A director struggles with a difficult sex scene between a young actor and actress who can't stand one another. Aided by her loyal assistant, she is hell-bent on getting the scene right without compromise.",
|
539 |
-
"id": "6af8fc3dd30d4f8caf5a2929fc88534b"
|
540 |
-
},
|
541 |
-
"dbe571ed810d40f48170147dcab1c90f": {
|
542 |
-
"label": false,
|
543 |
-
"text": "sound created by drawing directly on the soundtrack).",
|
544 |
-
"id": "dbe571ed810d40f48170147dcab1c90f"
|
545 |
-
},
|
546 |
-
"682102dfc5494f03926d16ae947a6250": {
|
547 |
-
"label": true,
|
548 |
-
"text": "one of glowing admiration! Written by Mark Toscano",
|
549 |
-
"id": "682102dfc5494f03926d16ae947a6250"
|
550 |
-
},
|
551 |
-
"9b044458bb0e4bd68359e62d5fb4b979": {
|
552 |
-
"label": false,
|
553 |
-
"text": "Seth McArdle (Samuel Davis) is a high school senior with an especially full plate. Not only must he navigate the usual social and academic pitfalls of high school, but he has to contend with his young twin sisters, serving as de facto parent in the absence of his deceased mother and deadbeat father. The pressure mounts when the bank calls with a foreclosure warning, and Seth's frustrations spill",
|
554 |
-
"id": "9b044458bb0e4bd68359e62d5fb4b979"
|
555 |
-
},
|
556 |
-
"abf2d24c7d8845769b7368be28f2c25d": {
|
557 |
-
"label": true,
|
558 |
-
"text": "Bjork is a beautiful creature and her music is stellar to anything I've ever heard. This DVD is essential for all Bjork fans, because you find something new every time you watch it.",
|
559 |
-
"id": "abf2d24c7d8845769b7368be28f2c25d"
|
560 |
-
}
|
561 |
-
},
|
562 |
-
"version": 11,
|
563 |
-
"description": "Positive sentiment"
|
564 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data/concept/lilac/positive-sentiment/sbert.pkl
DELETED
Binary file (94.4 kB)
|
|
data/concept/lilac/profanity/concept.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
data/concept/lilac/profanity/openai.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:a97028bbb8a75913874b83c768c5cdc8ad9ec00aba1ce4296429dd7326165ad7
|
3 |
-
size 3247822
|
|
|
|
|
|
|
|
data/concept/lilac/profanity/sbert.pkl
DELETED
Binary file (844 kB)
|
|
data/concept/lilac/toxicity/concept.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
data/concept/lilac/toxicity/sbert.pkl
DELETED
Binary file (958 kB)
|
|
data/datasets/local/spotify/data-00000-of-00001.parquet
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:32224657332b09187a737c73ab634f9d14c9ba9a240bd105f1b9819cde2afcef
|
3 |
-
size 37128682
|
|
|
|
|
|
|
|
data/datasets/local/spotify/manifest.json
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"files": [
|
3 |
-
"data-00000-of-00001.parquet"
|
4 |
-
],
|
5 |
-
"data_schema": {
|
6 |
-
"fields": {
|
7 |
-
"artist": {
|
8 |
-
"dtype": "string"
|
9 |
-
},
|
10 |
-
"song": {
|
11 |
-
"dtype": "string"
|
12 |
-
},
|
13 |
-
"link": {
|
14 |
-
"dtype": "string"
|
15 |
-
},
|
16 |
-
"text": {
|
17 |
-
"dtype": "string"
|
18 |
-
},
|
19 |
-
"__line_number__": {
|
20 |
-
"dtype": "int64"
|
21 |
-
},
|
22 |
-
"__rowid__": {
|
23 |
-
"dtype": "string"
|
24 |
-
}
|
25 |
-
}
|
26 |
-
}
|
27 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data/datasets/local/spotify/settings.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"ui": {"media_paths": [["text"]]}}
|
|
|
|
data/datasets/local/spotify/text/.concepts/local/aliens/sbert-neg-100.pkl
DELETED
Binary file (169 kB)
|
|
data/datasets/local/spotify/text/lang_detection/data-00000-of-00001.parquet
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:0f1555427c8dc3b2f1e9310f5e71b46297e607f710365e107c73c894d5a8e1b0
|
3 |
-
size 2033407
|
|
|
|
|
|
|
|
data/datasets/local/spotify/text/lang_detection/signal_manifest.json
DELETED
@@ -1,36 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"files": [
|
3 |
-
"data-00000-of-00001.parquet"
|
4 |
-
],
|
5 |
-
"parquet_id": "lang_detection(text)",
|
6 |
-
"data_schema": {
|
7 |
-
"fields": {
|
8 |
-
"__rowid__": {
|
9 |
-
"dtype": "string"
|
10 |
-
},
|
11 |
-
"text": {
|
12 |
-
"fields": {
|
13 |
-
"lang_detection": {
|
14 |
-
"repeated_field": {
|
15 |
-
"fields": {
|
16 |
-
"lang_code": {
|
17 |
-
"dtype": "string"
|
18 |
-
}
|
19 |
-
},
|
20 |
-
"dtype": "string_span"
|
21 |
-
},
|
22 |
-
"signal": {
|
23 |
-
"signal_name": "lang_detection"
|
24 |
-
}
|
25 |
-
}
|
26 |
-
}
|
27 |
-
}
|
28 |
-
}
|
29 |
-
},
|
30 |
-
"signal": {
|
31 |
-
"signal_name": "lang_detection"
|
32 |
-
},
|
33 |
-
"enriched_path": [
|
34 |
-
"text"
|
35 |
-
]
|
36 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data/datasets/local/spotify/text/sbert/data-00000-of-00001.parquet
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:9796beb630cc3503f3c2ac9db8f71e4c1604570836d78bbf364e801cd427c39e
|
3 |
-
size 2709987
|
|
|
|
|
|
|
|
data/datasets/local/spotify/text/sbert/embedding/local/outerspace/v34/data-00000-of-00001.parquet
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:d1ba0fe68cc02849b0a20b7f72047c8e9cb8e5ef5b57b0cd642fa0b0be8a6e06
|
3 |
-
size 3340135
|
|
|
|
|
|
|
|
data/datasets/local/spotify/text/sbert/embedding/local/outerspace/v34/signal_manifest.json
DELETED
@@ -1,64 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"files": [
|
3 |
-
"data-00000-of-00001.parquet"
|
4 |
-
],
|
5 |
-
"parquet_id": "local/outerspace/v34(text.sbert.*.embedding)",
|
6 |
-
"data_schema": {
|
7 |
-
"fields": {
|
8 |
-
"__rowid__": {
|
9 |
-
"dtype": "string"
|
10 |
-
},
|
11 |
-
"text": {
|
12 |
-
"fields": {
|
13 |
-
"sbert": {
|
14 |
-
"repeated_field": {
|
15 |
-
"fields": {
|
16 |
-
"embedding": {
|
17 |
-
"fields": {
|
18 |
-
"local/outerspace/v34": {
|
19 |
-
"dtype": "float32",
|
20 |
-
"signal": {
|
21 |
-
"signal_name": "concept_score",
|
22 |
-
"embedding": "sbert",
|
23 |
-
"namespace": "local",
|
24 |
-
"concept_name": "outerspace",
|
25 |
-
"draft": "main",
|
26 |
-
"num_negative_examples": 100
|
27 |
-
},
|
28 |
-
"bins": [
|
29 |
-
[
|
30 |
-
"Not in concept",
|
31 |
-
null,
|
32 |
-
0.5
|
33 |
-
],
|
34 |
-
[
|
35 |
-
"In concept",
|
36 |
-
0.5,
|
37 |
-
null
|
38 |
-
]
|
39 |
-
]
|
40 |
-
}
|
41 |
-
}
|
42 |
-
}
|
43 |
-
}
|
44 |
-
}
|
45 |
-
}
|
46 |
-
}
|
47 |
-
}
|
48 |
-
}
|
49 |
-
},
|
50 |
-
"signal": {
|
51 |
-
"signal_name": "concept_score",
|
52 |
-
"embedding": "sbert",
|
53 |
-
"namespace": "local",
|
54 |
-
"concept_name": "outerspace",
|
55 |
-
"draft": "main",
|
56 |
-
"num_negative_examples": 100
|
57 |
-
},
|
58 |
-
"enriched_path": [
|
59 |
-
"text",
|
60 |
-
"sbert",
|
61 |
-
"*",
|
62 |
-
"embedding"
|
63 |
-
]
|
64 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data/datasets/local/spotify/text/sbert/embeddings-00000-of-00001.keys.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:d5df43291782b8c731d4ce56537946654c642a01dc9a4e37de394836362f6b45
|
3 |
-
size 3727400
|
|
|
|
|
|
|
|
data/datasets/local/spotify/text/sbert/embeddings-00000-of-00001.npy
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:94e10c23d7229541e1f60b791a659d13673b10a03649abf0ae092e0e18c5aee3
|
3 |
-
size 170446976
|
|
|
|
|
|
|
|
data/datasets/local/spotify/text/sbert/signal_manifest.json
DELETED
@@ -1,37 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"files": [
|
3 |
-
"data-00000-of-00001.parquet"
|
4 |
-
],
|
5 |
-
"parquet_id": "sbert(text)",
|
6 |
-
"data_schema": {
|
7 |
-
"fields": {
|
8 |
-
"__rowid__": {
|
9 |
-
"dtype": "string"
|
10 |
-
},
|
11 |
-
"text": {
|
12 |
-
"fields": {
|
13 |
-
"sbert": {
|
14 |
-
"repeated_field": {
|
15 |
-
"fields": {
|
16 |
-
"embedding": {
|
17 |
-
"dtype": "embedding"
|
18 |
-
}
|
19 |
-
},
|
20 |
-
"dtype": "string_span"
|
21 |
-
},
|
22 |
-
"signal": {
|
23 |
-
"signal_name": "sbert"
|
24 |
-
}
|
25 |
-
}
|
26 |
-
}
|
27 |
-
}
|
28 |
-
}
|
29 |
-
},
|
30 |
-
"signal": {
|
31 |
-
"signal_name": "sbert"
|
32 |
-
},
|
33 |
-
"enriched_path": [
|
34 |
-
"text"
|
35 |
-
],
|
36 |
-
"embedding_filename_prefix": "embeddings-00000-of-00001"
|
37 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
lilac/concepts/concept.py
CHANGED
@@ -322,7 +322,8 @@ class ConceptModel:
|
|
322 |
def _calibrate_on_dataset(self, column_info: ConceptColumnInfo) -> None:
|
323 |
"""Calibrate the model on the embeddings in the provided vector store."""
|
324 |
db = get_dataset(column_info.namespace, column_info.name)
|
325 |
-
|
|
|
326 |
keys = vector_store.keys()
|
327 |
num_samples = min(column_info.num_negative_examples, len(keys))
|
328 |
sample_keys = random.sample(keys, num_samples)
|
@@ -354,12 +355,10 @@ class ConceptModel:
|
|
354 |
|
355 |
item_result: list[Item] = []
|
356 |
for embedding_item, score in zip(item, scores):
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
metadata={f'{self.namespace}/{self.concept_name}': score}))
|
362 |
-
result_items.append({self.embedding_name: item_result})
|
363 |
return result_items
|
364 |
|
365 |
def coef(self, draft: DraftId) -> np.ndarray:
|
|
|
322 |
def _calibrate_on_dataset(self, column_info: ConceptColumnInfo) -> None:
|
323 |
"""Calibrate the model on the embeddings in the provided vector store."""
|
324 |
db = get_dataset(column_info.namespace, column_info.name)
|
325 |
+
vector_index = db.get_vector_db_index(self.embedding_name, normalize_path(column_info.path))
|
326 |
+
vector_store = vector_index.get_vector_store()
|
327 |
keys = vector_store.keys()
|
328 |
num_samples = min(column_info.num_negative_examples, len(keys))
|
329 |
sample_keys = random.sample(keys, num_samples)
|
|
|
355 |
|
356 |
item_result: list[Item] = []
|
357 |
for embedding_item, score in zip(item, scores):
|
358 |
+
span = embedding_item[VALUE_KEY]
|
359 |
+
start, end = span[TEXT_SPAN_START_FEATURE], span[TEXT_SPAN_END_FEATURE]
|
360 |
+
item_result.append(lilac_span(start, end, {'score': score}))
|
361 |
+
result_items.append(item_result)
|
|
|
|
|
362 |
return result_items
|
363 |
|
364 |
def coef(self, draft: DraftId) -> np.ndarray:
|
lilac/config.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
import os
|
3 |
from typing import Any, Literal, Optional, Union, cast
|
4 |
|
5 |
-
from dotenv import
|
6 |
|
7 |
EnvironmentKeys = Union[Literal['LILAC_DATA_PATH'],
|
8 |
# Authentication on the demo.
|
@@ -20,41 +20,44 @@ EnvironmentKeys = Union[Literal['LILAC_DATA_PATH'],
|
|
20 |
Literal['DUCKDB_USE_VIEWS'],
|
21 |
# Debugging
|
22 |
Literal['DEBUG'], Literal['DISABLE_LOGS']]
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
|
26 |
def env(key: EnvironmentKeys, default: Optional[Any] = None) -> Any:
|
27 |
"""Return the value of an environment variable."""
|
28 |
-
|
29 |
-
first_load = False
|
30 |
-
# This is done lazily so we can prevent loading local environment variables when testing. The
|
31 |
-
# 'PYTEST_CURRENT_TEST' environment variable is only set after module initialization by pytest.
|
32 |
-
|
33 |
-
if _ENV is None:
|
34 |
-
in_test = os.environ.get('LILAC_TEST', None)
|
35 |
-
_ENV = {
|
36 |
-
**dotenv_values('.env'), # load shared variables
|
37 |
-
**dotenv_values('.env.demo'), # load demo-specific environment flags.
|
38 |
-
**(dotenv_values('.env.local') if not in_test else {})
|
39 |
-
}
|
40 |
-
first_load = True
|
41 |
-
|
42 |
-
# Override the file based configs with the current environment, in case flags have changed.
|
43 |
-
environment = {**_ENV, **os.environ}
|
44 |
-
|
45 |
-
if first_load:
|
46 |
-
if environment.get('LILAC_AUTH_ENABLED', None):
|
47 |
-
if not environment.get('GOOGLE_CLIENT_ID', None) or not environment.get(
|
48 |
-
'GOOGLE_CLIENT_SECRET', None):
|
49 |
-
raise ValueError(
|
50 |
-
'Missing `GOOGLE_CLIENT_ID` or `GOOGLE_CLIENT_SECRET` when `LILAC_AUTH_ENABLED=true`')
|
51 |
-
SECRET_KEY = environment.get('LILAC_OAUTH_SECRET_KEY', None)
|
52 |
-
if not SECRET_KEY:
|
53 |
-
raise ValueError('Missing `LILAC_OAUTH_SECRET_KEY` when `LILAC_AUTH_ENABLED=true`')
|
54 |
-
|
55 |
-
return environment.get(key, default)
|
56 |
|
57 |
|
58 |
def data_path() -> str:
|
59 |
"""Return the base path for data."""
|
60 |
return cast(str, env('LILAC_DATA_PATH', './data'))
|
|
|
|
|
|
|
|
|
|
2 |
import os
|
3 |
from typing import Any, Literal, Optional, Union, cast
|
4 |
|
5 |
+
from dotenv import load_dotenv
|
6 |
|
7 |
EnvironmentKeys = Union[Literal['LILAC_DATA_PATH'],
|
8 |
# Authentication on the demo.
|
|
|
20 |
Literal['DUCKDB_USE_VIEWS'],
|
21 |
# Debugging
|
22 |
Literal['DEBUG'], Literal['DISABLE_LOGS']]
|
23 |
+
|
24 |
+
|
25 |
+
def _init_env() -> None:
|
26 |
+
in_test = os.environ.get('LILAC_TEST', None)
|
27 |
+
# Load the .env files into the environment in order of highest to lowest priority.
|
28 |
+
|
29 |
+
if not in_test: # Skip local environment variables when testing.
|
30 |
+
load_dotenv('.env.local')
|
31 |
+
load_dotenv('.env.demo')
|
32 |
+
load_dotenv('.env')
|
33 |
+
|
34 |
+
if os.environ.get('LILAC_AUTH_ENABLED', None):
|
35 |
+
if not os.environ.get('GOOGLE_CLIENT_ID', None) or not os.environ.get(
|
36 |
+
'GOOGLE_CLIENT_SECRET', None):
|
37 |
+
raise ValueError(
|
38 |
+
'Missing `GOOGLE_CLIENT_ID` or `GOOGLE_CLIENT_SECRET` when `LILAC_AUTH_ENABLED=true`')
|
39 |
+
SECRET_KEY = os.environ.get('LILAC_OAUTH_SECRET_KEY', None)
|
40 |
+
if not SECRET_KEY:
|
41 |
+
raise ValueError('Missing `LILAC_OAUTH_SECRET_KEY` when `LILAC_AUTH_ENABLED=true`')
|
42 |
+
if os.environ.get('LILAC_AUTH_ENABLED', None):
|
43 |
+
if not os.environ.get('GOOGLE_CLIENT_ID', None) or not os.environ.get(
|
44 |
+
'GOOGLE_CLIENT_SECRET', None):
|
45 |
+
raise ValueError(
|
46 |
+
'Missing `GOOGLE_CLIENT_ID` or `GOOGLE_CLIENT_SECRET` when `LILAC_AUTH_ENABLED=true`')
|
47 |
+
SECRET_KEY = os.environ.get('LILAC_OAUTH_SECRET_KEY', None)
|
48 |
+
if not SECRET_KEY:
|
49 |
+
raise ValueError('Missing `LILAC_OAUTH_SECRET_KEY` when `LILAC_AUTH_ENABLED=true`')
|
50 |
|
51 |
|
52 |
def env(key: EnvironmentKeys, default: Optional[Any] = None) -> Any:
|
53 |
"""Return the value of an environment variable."""
|
54 |
+
return os.environ.get(key, default)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
|
57 |
def data_path() -> str:
|
58 |
"""Return the base path for data."""
|
59 |
return cast(str, env('LILAC_DATA_PATH', './data'))
|
60 |
+
|
61 |
+
|
62 |
+
# Initialize the environment at import time.
|
63 |
+
_init_env()
|
lilac/data/dataset.py
CHANGED
@@ -12,7 +12,7 @@ from pydantic import Field as PydanticField
|
|
12 |
from pydantic import StrictBool, StrictBytes, StrictFloat, StrictInt, StrictStr, validator
|
13 |
|
14 |
from ..auth import UserInfo
|
15 |
-
from ..embeddings.vector_store import
|
16 |
from ..schema import VALUE_KEY, Bin, DataType, Path, PathTuple, Schema, normalize_path
|
17 |
from ..signals.signal import Signal, TextEmbeddingSignal, get_signal_by_type, resolve_signal
|
18 |
from ..tasks import TaskStepId
|
@@ -165,7 +165,8 @@ ColumnId = Union[Path, Column]
|
|
165 |
|
166 |
class DatasetUISettings(BaseModel):
|
167 |
"""The UI persistent settings for a dataset."""
|
168 |
-
media_paths:
|
|
|
169 |
|
170 |
|
171 |
class DatasetSettings(BaseModel):
|
@@ -282,9 +283,8 @@ class Dataset(abc.ABC):
|
|
282 |
pass
|
283 |
|
284 |
@abc.abstractmethod
|
285 |
-
def
|
286 |
-
|
287 |
-
"""Get the vector store for a column."""
|
288 |
pass
|
289 |
|
290 |
@abc.abstractmethod
|
@@ -462,9 +462,9 @@ def default_settings(dataset: Dataset) -> DatasetSettings:
|
|
462 |
stats: list[StatsResult] = list(pool.map(lambda leaf: dataset.stats(leaf), leaf_paths))
|
463 |
sorted_stats = sorted([stat for stat in stats if stat.avg_text_length],
|
464 |
key=lambda stat: stat.avg_text_length or -1.0)
|
465 |
-
media_paths =
|
466 |
if sorted_stats:
|
467 |
-
media_paths = [sorted_stats[-1].path]
|
468 |
|
469 |
return DatasetSettings(ui=DatasetUISettings(media_paths=media_paths))
|
470 |
|
|
|
12 |
from pydantic import StrictBool, StrictBytes, StrictFloat, StrictInt, StrictStr, validator
|
13 |
|
14 |
from ..auth import UserInfo
|
15 |
+
from ..embeddings.vector_store import VectorDBIndex
|
16 |
from ..schema import VALUE_KEY, Bin, DataType, Path, PathTuple, Schema, normalize_path
|
17 |
from ..signals.signal import Signal, TextEmbeddingSignal, get_signal_by_type, resolve_signal
|
18 |
from ..tasks import TaskStepId
|
|
|
165 |
|
166 |
class DatasetUISettings(BaseModel):
|
167 |
"""The UI persistent settings for a dataset."""
|
168 |
+
media_paths: set[PathTuple] = set()
|
169 |
+
markdown_paths: set[PathTuple] = set()
|
170 |
|
171 |
|
172 |
class DatasetSettings(BaseModel):
|
|
|
283 |
pass
|
284 |
|
285 |
@abc.abstractmethod
|
286 |
+
def get_vector_db_index(self, embedding: str, path: PathTuple) -> VectorDBIndex:
|
287 |
+
"""Get the vector index for a path and an embedding."""
|
|
|
288 |
pass
|
289 |
|
290 |
@abc.abstractmethod
|
|
|
462 |
stats: list[StatsResult] = list(pool.map(lambda leaf: dataset.stats(leaf), leaf_paths))
|
463 |
sorted_stats = sorted([stat for stat in stats if stat.avg_text_length],
|
464 |
key=lambda stat: stat.avg_text_length or -1.0)
|
465 |
+
media_paths: set[PathTuple] = set()
|
466 |
if sorted_stats:
|
467 |
+
media_paths = set([sorted_stats[-1].path])
|
468 |
|
469 |
return DatasetSettings(ui=DatasetUISettings(media_paths=media_paths))
|
470 |
|
lilac/data/dataset_duckdb.py
CHANGED
@@ -19,7 +19,7 @@ from typing_extensions import override
|
|
19 |
from ..auth import UserInfo
|
20 |
from ..concepts.concept import ConceptColumnInfo
|
21 |
from ..config import data_path, env
|
22 |
-
from ..embeddings.vector_store import VectorStore
|
23 |
from ..embeddings.vector_store_numpy import NumpyVectorStore
|
24 |
from ..schema import (
|
25 |
MANIFEST_FILENAME,
|
@@ -33,32 +33,31 @@ from ..schema import (
|
|
33 |
Field,
|
34 |
Item,
|
35 |
Path,
|
|
|
36 |
PathTuple,
|
37 |
RichData,
|
38 |
Schema,
|
39 |
-
SignalInputType,
|
40 |
SourceManifest,
|
41 |
-
VectorKey,
|
42 |
column_paths_match,
|
43 |
is_float,
|
44 |
is_integer,
|
45 |
is_ordinal,
|
46 |
is_temporal,
|
47 |
normalize_path,
|
48 |
-
|
49 |
)
|
50 |
from ..signals.concept_labels import ConceptLabelsSignal
|
51 |
from ..signals.concept_scorer import ConceptScoreSignal
|
52 |
from ..signals.semantic_similarity import SemanticSimilaritySignal
|
53 |
from ..signals.signal import (
|
54 |
-
EMBEDDING_KEY,
|
55 |
Signal,
|
56 |
-
TextEmbeddingModelSignal,
|
57 |
TextEmbeddingSignal,
|
|
|
|
|
58 |
resolve_signal,
|
59 |
)
|
60 |
from ..signals.substring_search import SubstringSignal
|
61 |
-
from ..tasks import TaskStepId,
|
62 |
from ..utils import DebugTimer, get_dataset_output_dir, log, open_file
|
63 |
from . import dataset
|
64 |
from .dataset import (
|
@@ -95,13 +94,12 @@ from .dataset_utils import (
|
|
95 |
flatten,
|
96 |
flatten_keys,
|
97 |
merge_schemas,
|
98 |
-
|
99 |
-
replace_embeddings_with_none,
|
100 |
schema_contains_path,
|
101 |
sparse_to_dense_compute,
|
102 |
unflatten,
|
103 |
wrap_in_dicts,
|
104 |
-
|
105 |
write_items_to_parquet,
|
106 |
)
|
107 |
|
@@ -156,8 +154,8 @@ class DatasetDuckDB(Dataset):
|
|
156 |
self._signal_manifests: list[SignalManifest] = []
|
157 |
self.con = duckdb.connect(database=':memory:')
|
158 |
|
159 |
-
# Maps a
|
160 |
-
self.
|
161 |
self.vector_store_cls = vector_store_cls
|
162 |
self._manifest_lock = threading.Lock()
|
163 |
|
@@ -196,7 +194,8 @@ class DatasetDuckDB(Dataset):
|
|
196 |
signal_manifest = SignalManifest.parse_raw(f.read())
|
197 |
self._signal_manifests.append(signal_manifest)
|
198 |
signal_files = [os.path.join(root, f) for f in signal_manifest.files]
|
199 |
-
|
|
|
200 |
|
201 |
merged_schema = merge_schemas([self._source_manifest.data_schema] +
|
202 |
[m.data_schema for m in self._signal_manifests])
|
@@ -212,10 +211,13 @@ class DatasetDuckDB(Dataset):
|
|
212 |
# NOTE: "root_column" for each signal is defined as the top-level column.
|
213 |
select_sql = ', '.join([f'{SOURCE_VIEW_NAME}.*'] + [(
|
214 |
f'{_escape_col_name(manifest.parquet_id)}.{_escape_col_name(_root_column(manifest))} '
|
215 |
-
f'AS {_escape_col_name(manifest.parquet_id)}')
|
|
|
|
|
216 |
join_sql = ' '.join([SOURCE_VIEW_NAME] + [
|
217 |
f'join {_escape_col_name(manifest.parquet_id)} using ({UUID_COLUMN},)'
|
218 |
for manifest in self._signal_manifests
|
|
|
219 |
])
|
220 |
view_or_table = 'TABLE'
|
221 |
use_views = env('DUCKDB_USE_VIEWS', 0) or 0
|
@@ -267,105 +269,42 @@ class DatasetDuckDB(Dataset):
|
|
267 |
raise NotImplementedError('count is not yet implemented for DuckDB.')
|
268 |
|
269 |
@override
|
270 |
-
def
|
271 |
# Refresh the manifest to make sure we have the latest signal manifests.
|
272 |
self.manifest()
|
|
|
|
|
|
|
273 |
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
# Get all the embeddings and pass it to the vector store.
|
296 |
-
vector_store = self.vector_store_cls()
|
297 |
-
vector_store.add(keys, embeddings)
|
298 |
-
# Cache the vector store.
|
299 |
-
self._col_vector_stores[path] = vector_store
|
300 |
-
|
301 |
-
return self._col_vector_stores[path]
|
302 |
-
|
303 |
-
def _prepare_signal(
|
304 |
-
self,
|
305 |
-
signal: Signal,
|
306 |
-
source_path: PathTuple,
|
307 |
-
manifest: DatasetManifest,
|
308 |
-
compute_dependencies: Optional[bool] = False,
|
309 |
-
task_step_id: Optional[TaskStepId] = None) -> tuple[PathTuple, Optional[TaskStepId]]:
|
310 |
-
"""Run all the signals dependencies required to run this signal.
|
311 |
-
|
312 |
-
Args:
|
313 |
-
signal: The signal to prepare.
|
314 |
-
source_path: The source path the signal is running over.
|
315 |
-
compute_dependencies: If True, signals will get computed for the whole column. If False,
|
316 |
-
throw if the required inputs are not computed yet.
|
317 |
-
task_step_id: The TaskStepId used to run the signal.
|
318 |
-
|
319 |
-
Returns
|
320 |
-
The final path the signal will be run over and the new step id for the final signal.
|
321 |
-
"""
|
322 |
-
is_value_path = False
|
323 |
-
if source_path[-1] == VALUE_KEY:
|
324 |
-
is_value_path = True
|
325 |
-
source_path = source_path[:-1]
|
326 |
-
|
327 |
-
new_path = source_path
|
328 |
-
|
329 |
-
signals_to_compute: list[tuple[PathTuple, Signal]] = []
|
330 |
-
if isinstance(signal, TextEmbeddingModelSignal):
|
331 |
-
embedding_signal = signal.get_embedding_signal()
|
332 |
-
new_path = (*new_path, embedding_signal.key(), PATH_WILDCARD, EMBEDDING_KEY)
|
333 |
-
if new_path not in manifest.data_schema.leafs:
|
334 |
-
if not compute_dependencies:
|
335 |
-
raise ValueError(f'Embedding signal "{embedding_signal.key()}" is not computed over '
|
336 |
-
f'{source_path}. Please run `dataset.compute_signal` over '
|
337 |
-
f'{source_path} first.')
|
338 |
-
signals_to_compute.append((new_path, embedding_signal))
|
339 |
-
|
340 |
-
new_steps = len(signals_to_compute)
|
341 |
-
# Setup the task steps so the task progress indicator knows the number of steps before they are
|
342 |
-
# computed.
|
343 |
-
task_id: Optional[str] = None
|
344 |
-
step_id: Optional[int] = None
|
345 |
-
if task_step_id:
|
346 |
-
(task_id, step_id) = task_step_id
|
347 |
-
if task_id != '' and new_steps:
|
348 |
-
# Make a step for the parent.
|
349 |
-
set_worker_steps(task_id, [TaskStepInfo()] * (new_steps + 1))
|
350 |
-
|
351 |
-
for i, (new_path, signal) in enumerate(signals_to_compute):
|
352 |
-
if new_path not in manifest.data_schema.leafs:
|
353 |
-
self.compute_signal(
|
354 |
-
signal, source_path, task_step_id=(task_id, i) if task_id is not None else None)
|
355 |
-
|
356 |
-
if is_value_path:
|
357 |
-
new_path = (*new_path, VALUE_KEY)
|
358 |
-
|
359 |
-
new_task_id: Optional[TaskStepId] = None
|
360 |
-
if task_id is not None and step_id is not None:
|
361 |
-
new_task_id = (task_id, step_id + new_steps)
|
362 |
-
return (new_path, new_task_id)
|
363 |
|
364 |
@override
|
365 |
def compute_signal(self,
|
366 |
signal: Signal,
|
367 |
leaf_path: Path,
|
368 |
task_step_id: Optional[TaskStepId] = None) -> None:
|
|
|
|
|
369 |
source_path = normalize_path(leaf_path)
|
370 |
manifest = self.manifest()
|
371 |
|
@@ -373,10 +312,6 @@ class DatasetDuckDB(Dataset):
|
|
373 |
# Make a dummy task step so we report progress via tqdm.
|
374 |
task_step_id = ('', 0)
|
375 |
|
376 |
-
# Prepare the dependencies of this signal.
|
377 |
-
signal_source_path, task_step_id = self._prepare_signal(
|
378 |
-
signal, source_path, manifest, compute_dependencies=True, task_step_id=task_step_id)
|
379 |
-
|
380 |
# The manifest may have changed after computing the dependencies.
|
381 |
manifest = self.manifest()
|
382 |
|
@@ -392,9 +327,6 @@ class DatasetDuckDB(Dataset):
|
|
392 |
df = select_rows_result.df()
|
393 |
values = df['value']
|
394 |
|
395 |
-
source_path = signal_source_path
|
396 |
-
signal_col.path = source_path
|
397 |
-
|
398 |
enriched_path = _col_destination_path(signal_col, is_computed_signal=True)
|
399 |
spec = _split_path_into_subpaths_of_lists(enriched_path)
|
400 |
output_dir = os.path.join(self.dataset_path, _signal_dir(enriched_path))
|
@@ -403,20 +335,6 @@ class DatasetDuckDB(Dataset):
|
|
403 |
for uuid, item in zip(df[UUID_COLUMN], enriched_signal_items):
|
404 |
item[UUID_COLUMN] = uuid
|
405 |
|
406 |
-
is_embedding = isinstance(signal, TextEmbeddingSignal)
|
407 |
-
embedding_filename_prefix = None
|
408 |
-
if is_embedding:
|
409 |
-
embedding_filename_prefix = os.path.basename(
|
410 |
-
write_item_embeddings_to_disk(
|
411 |
-
keys=df[UUID_COLUMN],
|
412 |
-
embeddings=values,
|
413 |
-
output_dir=output_dir,
|
414 |
-
shard_index=0,
|
415 |
-
num_shards=1))
|
416 |
-
|
417 |
-
# Replace the embeddings with None so they are not serialized in the parquet file.
|
418 |
-
enriched_signal_items = (replace_embeddings_with_none(item) for item in enriched_signal_items)
|
419 |
-
|
420 |
enriched_signal_items = list(enriched_signal_items)
|
421 |
parquet_filename, _ = write_items_to_parquet(
|
422 |
items=enriched_signal_items,
|
@@ -431,12 +349,55 @@ class DatasetDuckDB(Dataset):
|
|
431 |
data_schema=signal_schema,
|
432 |
signal=signal,
|
433 |
enriched_path=source_path,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
434 |
parquet_id=make_parquet_id(signal, source_path, is_computed_signal=True),
|
435 |
embedding_filename_prefix=embedding_filename_prefix)
|
436 |
signal_manifest_filepath = os.path.join(output_dir, SIGNAL_MANIFEST_FILENAME)
|
|
|
437 |
with open_file(signal_manifest_filepath, 'w') as f:
|
438 |
f.write(signal_manifest.json(exclude_none=True, indent=2))
|
439 |
-
log(f'Wrote
|
440 |
|
441 |
@override
|
442 |
def delete_signal(self, signal_path: Path) -> None:
|
@@ -493,10 +454,9 @@ class DatasetDuckDB(Dataset):
|
|
493 |
|
494 |
# Signal transforms must have the same dtype as the leaf field.
|
495 |
signal = cast(Signal, col.signal_udf)
|
496 |
-
|
497 |
-
if not signal_compute_type_supports_dtype(compute_type, leaf.dtype):
|
498 |
raise ValueError(f'Leaf "{path}" has dtype "{leaf.dtype}" which is not supported '
|
499 |
-
f'by "{signal.key()}" with signal input type "{
|
500 |
|
501 |
def _validate_selection(self, columns: Sequence[Column], select_schema: Schema) -> None:
|
502 |
# Validate all the columns and make sure they exist in the `select_schema`.
|
@@ -728,8 +688,7 @@ class DatasetDuckDB(Dataset):
|
|
728 |
if not udf_cols_to_sort_by:
|
729 |
return None
|
730 |
udf_col = udf_cols_to_sort_by[0]
|
731 |
-
if udf_col.signal_udf and (udf_col.signal_udf
|
732 |
-
not in [SignalInputType.TEXT_EMBEDDING]):
|
733 |
return None
|
734 |
return udf_col
|
735 |
|
@@ -801,14 +760,6 @@ class DatasetDuckDB(Dataset):
|
|
801 |
if (UUID_COLUMN,) not in col_paths:
|
802 |
cols.append(column_from_identifier(UUID_COLUMN))
|
803 |
|
804 |
-
# Prepare UDF columns. Throw an error if they are not computed. Update the paths of the UDFs so
|
805 |
-
# they match the paths of the columns defined by splits and embeddings.
|
806 |
-
for col in cols:
|
807 |
-
if col.signal_udf:
|
808 |
-
# Do not auto-compute dependencies, throw an error if they are not computed.
|
809 |
-
col.path, _ = self._prepare_signal(
|
810 |
-
col.signal_udf, col.path, manifest, compute_dependencies=False)
|
811 |
-
|
812 |
schema = manifest.data_schema
|
813 |
|
814 |
if combine_columns:
|
@@ -825,9 +776,8 @@ class DatasetDuckDB(Dataset):
|
|
825 |
for udf_col in udf_columns:
|
826 |
if isinstance(udf_col.signal_udf, ConceptScoreSignal):
|
827 |
# Set dataset information on the signal.
|
828 |
-
source_path = udf_col.path if udf_col.path[-1] != EMBEDDING_KEY else udf_col.path[:-3]
|
829 |
udf_col.signal_udf.set_column_info(
|
830 |
-
ConceptColumnInfo(namespace=self.namespace, name=self.dataset_name, path=
|
831 |
|
832 |
if isinstance(udf_col.signal_udf, (ConceptScoreSignal, ConceptLabelsSignal)):
|
833 |
# Concept are access controlled so we tell it about the user.
|
@@ -863,19 +813,20 @@ class DatasetDuckDB(Dataset):
|
|
863 |
|
864 |
topk_udf_col = self._topk_udf_to_sort_by(udf_columns, sort_by, limit, sort_order)
|
865 |
if topk_udf_col:
|
866 |
-
|
867 |
if where_query:
|
868 |
# If there are filters, we need to send UUIDs to the top k query.
|
869 |
df = con.execute(f'SELECT {UUID_COLUMN} FROM t {where_query}').df()
|
870 |
total_num_rows = len(df)
|
871 |
-
|
|
|
872 |
|
873 |
-
topk_signal = cast(
|
874 |
# The input is an embedding.
|
875 |
-
|
876 |
k = (limit or 0) + (offset or 0)
|
877 |
-
topk = topk_signal.vector_compute_topk(k,
|
878 |
-
topk_uuids = list(dict.fromkeys([cast(str,
|
879 |
|
880 |
# Ignore all the other filters and filter DuckDB results only by the top k UUIDs.
|
881 |
uuid_filter = Filter(path=(UUID_COLUMN,), op=ListOp.IN, value=topk_uuids)
|
@@ -995,13 +946,12 @@ class DatasetDuckDB(Dataset):
|
|
995 |
with DebugTimer(f'Computing signal "{signal.signal_name}"'):
|
996 |
signal.setup()
|
997 |
|
998 |
-
if signal
|
999 |
-
|
1000 |
-
|
1001 |
-
vector_store = self.get_vector_store(embedding_signal.embedding, udf_col.path)
|
1002 |
flat_keys = list(flatten_keys(df[UUID_COLUMN], input))
|
1003 |
signal_out = sparse_to_dense_compute(
|
1004 |
-
iter(flat_keys), lambda keys:
|
1005 |
# Add progress.
|
1006 |
if task_step_id is not None:
|
1007 |
signal_out = progress(
|
@@ -1115,14 +1065,6 @@ class DatasetDuckDB(Dataset):
|
|
1115 |
if (UUID_COLUMN,) not in col_paths:
|
1116 |
cols.append(column_from_identifier(UUID_COLUMN))
|
1117 |
|
1118 |
-
# Prepare UDF columns. Throw an error if they are not computed. Update the paths of the UDFs so
|
1119 |
-
# they match the paths of the columns defined by splits and embeddings.
|
1120 |
-
for col in cols:
|
1121 |
-
if col.signal_udf:
|
1122 |
-
# Do not auto-compute dependencies, throw an error if they are not computed.
|
1123 |
-
col.path, _ = self._prepare_signal(
|
1124 |
-
col.signal_udf, col.path, manifest, compute_dependencies=False)
|
1125 |
-
|
1126 |
self._normalize_searches(searches, manifest)
|
1127 |
search_udfs = self._search_udfs(searches, manifest)
|
1128 |
cols.extend([search_udf.udf for search_udf in search_udfs])
|
@@ -1188,6 +1130,8 @@ class DatasetDuckDB(Dataset):
|
|
1188 |
select_leaf = select_leaf or column.signal_udf is not None
|
1189 |
|
1190 |
for m in parquet_manifests:
|
|
|
|
|
1191 |
# Skip this parquet file if it doesn't contain the path.
|
1192 |
if not schema_contains_path(m.data_schema, path):
|
1193 |
continue
|
@@ -1284,9 +1228,8 @@ class DatasetDuckDB(Dataset):
|
|
1284 |
if not embedding:
|
1285 |
raise ValueError(f'Please provide an embedding for semantic search. Got search: {search}')
|
1286 |
|
1287 |
-
embedding_path = (*search_path, embedding, PATH_WILDCARD, EMBEDDING_KEY)
|
1288 |
try:
|
1289 |
-
manifest.data_schema.get_field(
|
1290 |
except Exception as e:
|
1291 |
raise ValueError(
|
1292 |
f'Embedding {embedding} has not been computed. '
|
@@ -1314,7 +1257,7 @@ class DatasetDuckDB(Dataset):
|
|
1314 |
output_path=_col_destination_path(concept_labels_udf),
|
1315 |
sort=None))
|
1316 |
|
1317 |
-
udf = Column(path=
|
1318 |
|
1319 |
output_path = _col_destination_path(udf)
|
1320 |
search_udfs.append(
|
@@ -1373,7 +1316,7 @@ class DatasetDuckDB(Dataset):
|
|
1373 |
sql_op = BINARY_OP_TO_SQL[cast(BinaryOp, f.op)]
|
1374 |
filter_val = cast(FeatureValue, f.value)
|
1375 |
if isinstance(filter_val, str):
|
1376 |
-
filter_val =
|
1377 |
elif isinstance(filter_val, bytes):
|
1378 |
filter_val = _bytes_to_blob_literal(filter_val)
|
1379 |
else:
|
|
|
19 |
from ..auth import UserInfo
|
20 |
from ..concepts.concept import ConceptColumnInfo
|
21 |
from ..config import data_path, env
|
22 |
+
from ..embeddings.vector_store import VectorDBIndex, VectorStore
|
23 |
from ..embeddings.vector_store_numpy import NumpyVectorStore
|
24 |
from ..schema import (
|
25 |
MANIFEST_FILENAME,
|
|
|
33 |
Field,
|
34 |
Item,
|
35 |
Path,
|
36 |
+
PathKey,
|
37 |
PathTuple,
|
38 |
RichData,
|
39 |
Schema,
|
|
|
40 |
SourceManifest,
|
|
|
41 |
column_paths_match,
|
42 |
is_float,
|
43 |
is_integer,
|
44 |
is_ordinal,
|
45 |
is_temporal,
|
46 |
normalize_path,
|
47 |
+
signal_type_supports_dtype,
|
48 |
)
|
49 |
from ..signals.concept_labels import ConceptLabelsSignal
|
50 |
from ..signals.concept_scorer import ConceptScoreSignal
|
51 |
from ..signals.semantic_similarity import SemanticSimilaritySignal
|
52 |
from ..signals.signal import (
|
|
|
53 |
Signal,
|
|
|
54 |
TextEmbeddingSignal,
|
55 |
+
VectorSignal,
|
56 |
+
get_signal_by_type,
|
57 |
resolve_signal,
|
58 |
)
|
59 |
from ..signals.substring_search import SubstringSignal
|
60 |
+
from ..tasks import TaskStepId, progress
|
61 |
from ..utils import DebugTimer, get_dataset_output_dir, log, open_file
|
62 |
from . import dataset
|
63 |
from .dataset import (
|
|
|
94 |
flatten,
|
95 |
flatten_keys,
|
96 |
merge_schemas,
|
97 |
+
read_embeddings_from_disk,
|
|
|
98 |
schema_contains_path,
|
99 |
sparse_to_dense_compute,
|
100 |
unflatten,
|
101 |
wrap_in_dicts,
|
102 |
+
write_embeddings_to_disk,
|
103 |
write_items_to_parquet,
|
104 |
)
|
105 |
|
|
|
154 |
self._signal_manifests: list[SignalManifest] = []
|
155 |
self.con = duckdb.connect(database=':memory:')
|
156 |
|
157 |
+
# Maps a path and embedding to the vector index. This is lazily generated as needed.
|
158 |
+
self._vector_indices: dict[tuple[PathKey, str], VectorDBIndex] = {}
|
159 |
self.vector_store_cls = vector_store_cls
|
160 |
self._manifest_lock = threading.Lock()
|
161 |
|
|
|
194 |
signal_manifest = SignalManifest.parse_raw(f.read())
|
195 |
self._signal_manifests.append(signal_manifest)
|
196 |
signal_files = [os.path.join(root, f) for f in signal_manifest.files]
|
197 |
+
if signal_files:
|
198 |
+
self._create_view(signal_manifest.parquet_id, signal_files)
|
199 |
|
200 |
merged_schema = merge_schemas([self._source_manifest.data_schema] +
|
201 |
[m.data_schema for m in self._signal_manifests])
|
|
|
211 |
# NOTE: "root_column" for each signal is defined as the top-level column.
|
212 |
select_sql = ', '.join([f'{SOURCE_VIEW_NAME}.*'] + [(
|
213 |
f'{_escape_col_name(manifest.parquet_id)}.{_escape_col_name(_root_column(manifest))} '
|
214 |
+
f'AS {_escape_col_name(manifest.parquet_id)}')
|
215 |
+
for manifest in self._signal_manifests
|
216 |
+
if manifest.files])
|
217 |
join_sql = ' '.join([SOURCE_VIEW_NAME] + [
|
218 |
f'join {_escape_col_name(manifest.parquet_id)} using ({UUID_COLUMN},)'
|
219 |
for manifest in self._signal_manifests
|
220 |
+
if manifest.files
|
221 |
])
|
222 |
view_or_table = 'TABLE'
|
223 |
use_views = env('DUCKDB_USE_VIEWS', 0) or 0
|
|
|
269 |
raise NotImplementedError('count is not yet implemented for DuckDB.')
|
270 |
|
271 |
@override
|
272 |
+
def get_vector_db_index(self, embedding: str, path: PathTuple) -> VectorDBIndex:
|
273 |
# Refresh the manifest to make sure we have the latest signal manifests.
|
274 |
self.manifest()
|
275 |
+
index_key = (path, embedding)
|
276 |
+
if index_key in self._vector_indices:
|
277 |
+
return self._vector_indices[index_key]
|
278 |
|
279 |
+
manifests = [
|
280 |
+
m for m in self._signal_manifests
|
281 |
+
if schema_contains_path(m.data_schema, path) and m.embedding_filename_prefix
|
282 |
+
]
|
283 |
+
if not manifests:
|
284 |
+
raise ValueError(f'No embedding found for path {path}.')
|
285 |
+
if len(manifests) > 1:
|
286 |
+
raise ValueError(f'Multiple embeddings found for path {path}. Got: {manifests}')
|
287 |
+
manifest = manifests[0]
|
288 |
+
if not manifest.embedding_filename_prefix:
|
289 |
+
raise ValueError(f'Signal manifest for path {path} is not an embedding. '
|
290 |
+
f'Got signal manifest: {manifest}')
|
291 |
+
|
292 |
+
signal_name = cast(str, manifest.signal.signal_name)
|
293 |
+
filepath_prefix = os.path.join(self.dataset_path, _signal_dir(manifest.enriched_path),
|
294 |
+
signal_name, manifest.embedding_filename_prefix)
|
295 |
+
spans, embeddings = read_embeddings_from_disk(filepath_prefix)
|
296 |
+
vector_index = VectorDBIndex(self.vector_store_cls, spans, embeddings)
|
297 |
+
# Cache the vector index.
|
298 |
+
self._vector_indices[index_key] = vector_index
|
299 |
+
return vector_index
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
300 |
|
301 |
@override
|
302 |
def compute_signal(self,
|
303 |
signal: Signal,
|
304 |
leaf_path: Path,
|
305 |
task_step_id: Optional[TaskStepId] = None) -> None:
|
306 |
+
if isinstance(signal, TextEmbeddingSignal):
|
307 |
+
return self.compute_embedding(signal.name, leaf_path, task_step_id)
|
308 |
source_path = normalize_path(leaf_path)
|
309 |
manifest = self.manifest()
|
310 |
|
|
|
312 |
# Make a dummy task step so we report progress via tqdm.
|
313 |
task_step_id = ('', 0)
|
314 |
|
|
|
|
|
|
|
|
|
315 |
# The manifest may have changed after computing the dependencies.
|
316 |
manifest = self.manifest()
|
317 |
|
|
|
327 |
df = select_rows_result.df()
|
328 |
values = df['value']
|
329 |
|
|
|
|
|
|
|
330 |
enriched_path = _col_destination_path(signal_col, is_computed_signal=True)
|
331 |
spec = _split_path_into_subpaths_of_lists(enriched_path)
|
332 |
output_dir = os.path.join(self.dataset_path, _signal_dir(enriched_path))
|
|
|
335 |
for uuid, item in zip(df[UUID_COLUMN], enriched_signal_items):
|
336 |
item[UUID_COLUMN] = uuid
|
337 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
338 |
enriched_signal_items = list(enriched_signal_items)
|
339 |
parquet_filename, _ = write_items_to_parquet(
|
340 |
items=enriched_signal_items,
|
|
|
349 |
data_schema=signal_schema,
|
350 |
signal=signal,
|
351 |
enriched_path=source_path,
|
352 |
+
parquet_id=make_parquet_id(signal, source_path, is_computed_signal=True))
|
353 |
+
signal_manifest_filepath = os.path.join(output_dir, SIGNAL_MANIFEST_FILENAME)
|
354 |
+
with open_file(signal_manifest_filepath, 'w') as f:
|
355 |
+
f.write(signal_manifest.json(exclude_none=True, indent=2))
|
356 |
+
log(f'Wrote signal output to {output_dir}')
|
357 |
+
|
358 |
+
@override
|
359 |
+
def compute_embedding(self,
|
360 |
+
embedding: str,
|
361 |
+
leaf_path: Path,
|
362 |
+
task_step_id: Optional[TaskStepId] = None) -> None:
|
363 |
+
source_path = normalize_path(leaf_path)
|
364 |
+
manifest = self.manifest()
|
365 |
+
|
366 |
+
if task_step_id is None:
|
367 |
+
# Make a dummy task step so we report progress via tqdm.
|
368 |
+
task_step_id = ('', 0)
|
369 |
+
|
370 |
+
signal = get_signal_by_type(embedding, TextEmbeddingSignal)()
|
371 |
+
signal_col = Column(path=source_path, alias='value', signal_udf=signal)
|
372 |
+
select_rows_result = self.select_rows([signal_col],
|
373 |
+
task_step_id=task_step_id,
|
374 |
+
resolve_span=True)
|
375 |
+
df = select_rows_result.df()
|
376 |
+
values = df['value']
|
377 |
+
|
378 |
+
enriched_path = _col_destination_path(signal_col, is_computed_signal=True)
|
379 |
+
output_dir = os.path.join(self.dataset_path, _signal_dir(enriched_path))
|
380 |
+
signal_schema = create_signal_schema(signal, source_path, manifest.data_schema)
|
381 |
+
embedding_filename_prefix = os.path.basename(
|
382 |
+
write_embeddings_to_disk(
|
383 |
+
uuids=df[UUID_COLUMN],
|
384 |
+
signal_items=values,
|
385 |
+
output_dir=output_dir,
|
386 |
+
shard_index=0,
|
387 |
+
num_shards=1))
|
388 |
+
|
389 |
+
signal_manifest = SignalManifest(
|
390 |
+
files=[],
|
391 |
+
data_schema=signal_schema,
|
392 |
+
signal=signal,
|
393 |
+
enriched_path=source_path,
|
394 |
parquet_id=make_parquet_id(signal, source_path, is_computed_signal=True),
|
395 |
embedding_filename_prefix=embedding_filename_prefix)
|
396 |
signal_manifest_filepath = os.path.join(output_dir, SIGNAL_MANIFEST_FILENAME)
|
397 |
+
|
398 |
with open_file(signal_manifest_filepath, 'w') as f:
|
399 |
f.write(signal_manifest.json(exclude_none=True, indent=2))
|
400 |
+
log(f'Wrote embedding index to {output_dir}')
|
401 |
|
402 |
@override
|
403 |
def delete_signal(self, signal_path: Path) -> None:
|
|
|
454 |
|
455 |
# Signal transforms must have the same dtype as the leaf field.
|
456 |
signal = cast(Signal, col.signal_udf)
|
457 |
+
if not signal_type_supports_dtype(signal.input_type, leaf.dtype):
|
|
|
458 |
raise ValueError(f'Leaf "{path}" has dtype "{leaf.dtype}" which is not supported '
|
459 |
+
f'by "{signal.key()}" with signal input type "{signal.input_type}".')
|
460 |
|
461 |
def _validate_selection(self, columns: Sequence[Column], select_schema: Schema) -> None:
|
462 |
# Validate all the columns and make sure they exist in the `select_schema`.
|
|
|
688 |
if not udf_cols_to_sort_by:
|
689 |
return None
|
690 |
udf_col = udf_cols_to_sort_by[0]
|
691 |
+
if udf_col.signal_udf and not isinstance(udf_col.signal_udf, VectorSignal):
|
|
|
692 |
return None
|
693 |
return udf_col
|
694 |
|
|
|
760 |
if (UUID_COLUMN,) not in col_paths:
|
761 |
cols.append(column_from_identifier(UUID_COLUMN))
|
762 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
763 |
schema = manifest.data_schema
|
764 |
|
765 |
if combine_columns:
|
|
|
776 |
for udf_col in udf_columns:
|
777 |
if isinstance(udf_col.signal_udf, ConceptScoreSignal):
|
778 |
# Set dataset information on the signal.
|
|
|
779 |
udf_col.signal_udf.set_column_info(
|
780 |
+
ConceptColumnInfo(namespace=self.namespace, name=self.dataset_name, path=udf_col.path))
|
781 |
|
782 |
if isinstance(udf_col.signal_udf, (ConceptScoreSignal, ConceptLabelsSignal)):
|
783 |
# Concept are access controlled so we tell it about the user.
|
|
|
813 |
|
814 |
topk_udf_col = self._topk_udf_to_sort_by(udf_columns, sort_by, limit, sort_order)
|
815 |
if topk_udf_col:
|
816 |
+
path_keys: Optional[Iterable[PathKey]] = None
|
817 |
if where_query:
|
818 |
# If there are filters, we need to send UUIDs to the top k query.
|
819 |
df = con.execute(f'SELECT {UUID_COLUMN} FROM t {where_query}').df()
|
820 |
total_num_rows = len(df)
|
821 |
+
# Convert UUIDs to path keys.
|
822 |
+
path_keys = [(uuid,) for uuid in df[UUID_COLUMN]]
|
823 |
|
824 |
+
topk_signal = cast(VectorSignal, topk_udf_col.signal_udf)
|
825 |
# The input is an embedding.
|
826 |
+
vector_index = self.get_vector_db_index(topk_signal.embedding, topk_udf_col.path)
|
827 |
k = (limit or 0) + (offset or 0)
|
828 |
+
topk = topk_signal.vector_compute_topk(k, vector_index, path_keys)
|
829 |
+
topk_uuids = list(dict.fromkeys([cast(str, path_key[0]) for path_key, _ in topk]))
|
830 |
|
831 |
# Ignore all the other filters and filter DuckDB results only by the top k UUIDs.
|
832 |
uuid_filter = Filter(path=(UUID_COLUMN,), op=ListOp.IN, value=topk_uuids)
|
|
|
946 |
with DebugTimer(f'Computing signal "{signal.signal_name}"'):
|
947 |
signal.setup()
|
948 |
|
949 |
+
if isinstance(signal, VectorSignal):
|
950 |
+
embedding_signal = signal
|
951 |
+
vector_store = self.get_vector_db_index(embedding_signal.embedding, udf_col.path)
|
|
|
952 |
flat_keys = list(flatten_keys(df[UUID_COLUMN], input))
|
953 |
signal_out = sparse_to_dense_compute(
|
954 |
+
iter(flat_keys), lambda keys: embedding_signal.vector_compute(keys, vector_store))
|
955 |
# Add progress.
|
956 |
if task_step_id is not None:
|
957 |
signal_out = progress(
|
|
|
1065 |
if (UUID_COLUMN,) not in col_paths:
|
1066 |
cols.append(column_from_identifier(UUID_COLUMN))
|
1067 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1068 |
self._normalize_searches(searches, manifest)
|
1069 |
search_udfs = self._search_udfs(searches, manifest)
|
1070 |
cols.extend([search_udf.udf for search_udf in search_udfs])
|
|
|
1130 |
select_leaf = select_leaf or column.signal_udf is not None
|
1131 |
|
1132 |
for m in parquet_manifests:
|
1133 |
+
if not m.files:
|
1134 |
+
continue
|
1135 |
# Skip this parquet file if it doesn't contain the path.
|
1136 |
if not schema_contains_path(m.data_schema, path):
|
1137 |
continue
|
|
|
1228 |
if not embedding:
|
1229 |
raise ValueError(f'Please provide an embedding for semantic search. Got search: {search}')
|
1230 |
|
|
|
1231 |
try:
|
1232 |
+
manifest.data_schema.get_field((*search_path, embedding))
|
1233 |
except Exception as e:
|
1234 |
raise ValueError(
|
1235 |
f'Embedding {embedding} has not been computed. '
|
|
|
1257 |
output_path=_col_destination_path(concept_labels_udf),
|
1258 |
sort=None))
|
1259 |
|
1260 |
+
udf = Column(path=search_path, signal_udf=search_signal)
|
1261 |
|
1262 |
output_path = _col_destination_path(udf)
|
1263 |
search_udfs.append(
|
|
|
1316 |
sql_op = BINARY_OP_TO_SQL[cast(BinaryOp, f.op)]
|
1317 |
filter_val = cast(FeatureValue, f.value)
|
1318 |
if isinstance(filter_val, str):
|
1319 |
+
filter_val = _escape_string_literal(filter_val)
|
1320 |
elif isinstance(filter_val, bytes):
|
1321 |
filter_val = _bytes_to_blob_literal(filter_val)
|
1322 |
else:
|
lilac/data/dataset_test_utils.py
CHANGED
@@ -4,8 +4,10 @@ import pathlib
|
|
4 |
from datetime import datetime
|
5 |
from typing import Optional, Type, cast
|
6 |
|
|
|
7 |
from typing_extensions import Protocol
|
8 |
|
|
|
9 |
from ..schema import (
|
10 |
MANIFEST_FILENAME,
|
11 |
PARQUET_FILENAME_PREFIX,
|
@@ -13,14 +15,13 @@ from ..schema import (
|
|
13 |
DataType,
|
14 |
Field,
|
15 |
Item,
|
|
|
16 |
Schema,
|
17 |
SourceManifest,
|
18 |
-
field,
|
19 |
)
|
20 |
-
from ..signals.signal import EMBEDDING_KEY
|
21 |
from ..utils import get_dataset_output_dir, open_file
|
22 |
from .dataset import Dataset
|
23 |
-
from .dataset_utils import is_primitive,
|
24 |
|
25 |
TEST_NAMESPACE = 'test_namespace'
|
26 |
TEST_DATASET_NAME = 'test_dataset'
|
@@ -109,11 +110,16 @@ def enriched_item(value: Optional[Item] = None, metadata: dict[str, Item] = {})
|
|
109 |
return {VALUE_KEY: value, **metadata}
|
110 |
|
111 |
|
112 |
-
def
|
113 |
-
|
114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
|
116 |
-
|
117 |
-
def enriched_embedding_span_field(metadata: Optional[object] = {}) -> Field:
|
118 |
-
"""Makes a field that represents an embedding span that was enriched with metadata."""
|
119 |
-
return field('string_span', fields={EMBEDDING_KEY: field('embedding', fields=metadata)})
|
|
|
4 |
from datetime import datetime
|
5 |
from typing import Optional, Type, cast
|
6 |
|
7 |
+
import numpy as np
|
8 |
from typing_extensions import Protocol
|
9 |
|
10 |
+
from ..embeddings.vector_store import VectorDBIndex, VectorStore
|
11 |
from ..schema import (
|
12 |
MANIFEST_FILENAME,
|
13 |
PARQUET_FILENAME_PREFIX,
|
|
|
15 |
DataType,
|
16 |
Field,
|
17 |
Item,
|
18 |
+
PathKey,
|
19 |
Schema,
|
20 |
SourceManifest,
|
|
|
21 |
)
|
|
|
22 |
from ..utils import get_dataset_output_dir, open_file
|
23 |
from .dataset import Dataset
|
24 |
+
from .dataset_utils import is_primitive, write_items_to_parquet
|
25 |
|
26 |
TEST_NAMESPACE = 'test_namespace'
|
27 |
TEST_DATASET_NAME = 'test_dataset'
|
|
|
110 |
return {VALUE_KEY: value, **metadata}
|
111 |
|
112 |
|
113 |
+
def make_vector_index(vector_store_cls: Type[VectorStore],
|
114 |
+
vector_dict: dict[PathKey, list[list[float]]]) -> VectorDBIndex:
|
115 |
+
"""Make a vector index from a dictionary of vector keys to vectors."""
|
116 |
+
embeddings: list[np.ndarray] = []
|
117 |
+
spans: list[tuple[PathKey, list[tuple[int, int]]]] = []
|
118 |
+
for path_key, vectors in vector_dict.items():
|
119 |
+
vector_spans: list[tuple[int, int]] = []
|
120 |
+
for i, vector in enumerate(vectors):
|
121 |
+
embeddings.append(np.array(vector))
|
122 |
+
vector_spans.append((0, 0))
|
123 |
+
spans.append((path_key, vector_spans))
|
124 |
|
125 |
+
return VectorDBIndex(vector_store_cls, spans, np.array(embeddings))
|
|
|
|
|
|
lilac/data/dataset_utils.py
CHANGED
@@ -22,6 +22,7 @@ from ..schema import (
|
|
22 |
VALUE_KEY,
|
23 |
Field,
|
24 |
Item,
|
|
|
25 |
PathTuple,
|
26 |
Schema,
|
27 |
VectorKey,
|
@@ -33,6 +34,7 @@ from ..signals.signal import EMBEDDING_KEY, Signal
|
|
33 |
from ..utils import file_exists, log, open_file
|
34 |
|
35 |
_KEYS_SUFFIX = '.keys.pkl'
|
|
|
36 |
_EMBEDDINGS_SUFFIX = '.npy'
|
37 |
|
38 |
|
@@ -218,53 +220,58 @@ def create_signal_schema(signal: Signal, source_path: PathTuple, current_schema:
|
|
218 |
return schema({UUID_COLUMN: 'string', **cast(dict, enriched_schema.fields)})
|
219 |
|
220 |
|
221 |
-
def
|
222 |
-
|
223 |
"""Write a set of embeddings to disk."""
|
224 |
output_path_prefix = embedding_index_filename_prefix(output_dir, shard_index, num_shards)
|
225 |
|
226 |
-
#
|
227 |
def embedding_predicate(input: Any) -> bool:
|
228 |
-
return isinstance(input,
|
|
|
229 |
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
|
234 |
embedding_vectors: list[np.ndarray] = []
|
235 |
-
|
236 |
-
for
|
237 |
-
if not
|
238 |
# Sparse embeddings may not have an embedding for every key.
|
239 |
continue
|
240 |
|
241 |
-
|
242 |
-
|
243 |
-
|
|
|
|
|
|
|
|
|
|
|
244 |
|
245 |
embedding_matrix = np.array(embedding_vectors)
|
246 |
-
|
247 |
# Write the embedding index and the ordered UUID column to disk so they can be joined later.
|
248 |
|
249 |
with open_file(output_path_prefix + _EMBEDDINGS_SUFFIX, 'wb') as f:
|
250 |
np.save(cast(str, f), embedding_matrix, allow_pickle=False)
|
251 |
-
with open_file(output_path_prefix +
|
252 |
-
pickle.dump(
|
253 |
|
254 |
return output_path_prefix
|
255 |
|
256 |
|
257 |
-
def
|
258 |
-
|
|
|
259 |
if not file_exists(filepath_prefix + _EMBEDDINGS_SUFFIX):
|
260 |
raise ValueError(F'Embedding index does not exist at path {filepath_prefix}. '
|
261 |
'Please run dataset.compute_signal() on the embedding signal first.')
|
262 |
-
|
263 |
# Read the embedding index from disk.
|
264 |
embeddings = np.load(filepath_prefix + _EMBEDDINGS_SUFFIX, allow_pickle=False)
|
265 |
-
with open_file(filepath_prefix +
|
266 |
-
|
267 |
-
return
|
268 |
|
269 |
|
270 |
def write_items_to_parquet(items: Iterable[Item], output_dir: str, schema: Schema,
|
|
|
22 |
VALUE_KEY,
|
23 |
Field,
|
24 |
Item,
|
25 |
+
PathKey,
|
26 |
PathTuple,
|
27 |
Schema,
|
28 |
VectorKey,
|
|
|
34 |
from ..utils import file_exists, log, open_file
|
35 |
|
36 |
_KEYS_SUFFIX = '.keys.pkl'
|
37 |
+
_SPANS_SUFFIX = '.spans.pkl'
|
38 |
_EMBEDDINGS_SUFFIX = '.npy'
|
39 |
|
40 |
|
|
|
220 |
return schema({UUID_COLUMN: 'string', **cast(dict, enriched_schema.fields)})
|
221 |
|
222 |
|
223 |
+
def write_embeddings_to_disk(uuids: Iterable[str], signal_items: Iterable[Item], output_dir: str,
|
224 |
+
shard_index: int, num_shards: int) -> str:
|
225 |
"""Write a set of embeddings to disk."""
|
226 |
output_path_prefix = embedding_index_filename_prefix(output_dir, shard_index, num_shards)
|
227 |
|
228 |
+
# For each item, we have a list of embedding spans.
|
229 |
def embedding_predicate(input: Any) -> bool:
|
230 |
+
return (isinstance(input, list) and len(input) > 0 and isinstance(input[0], dict) and
|
231 |
+
EMBEDDING_KEY in input[0])
|
232 |
|
233 |
+
path_keys = flatten_keys(uuids, signal_items, is_primitive_predicate=embedding_predicate)
|
234 |
+
all_embeddings = cast(Iterable[Item],
|
235 |
+
flatten(signal_items, is_primitive_predicate=embedding_predicate))
|
236 |
|
237 |
embedding_vectors: list[np.ndarray] = []
|
238 |
+
all_spans: list[tuple[PathKey, list[tuple[int, int]]]] = []
|
239 |
+
for path_key, embeddings in zip(path_keys, all_embeddings):
|
240 |
+
if not path_key or not embeddings:
|
241 |
# Sparse embeddings may not have an embedding for every key.
|
242 |
continue
|
243 |
|
244 |
+
spans: list[tuple[int, int]] = []
|
245 |
+
for e in embeddings:
|
246 |
+
span = e[VALUE_KEY]
|
247 |
+
vector = e[EMBEDDING_KEY]
|
248 |
+
# We squeeze here because embedding functions can return outer dimensions of 1.
|
249 |
+
embedding_vectors.append(vector.reshape(-1))
|
250 |
+
spans.append((span[TEXT_SPAN_START_FEATURE], span[TEXT_SPAN_END_FEATURE]))
|
251 |
+
all_spans.append((path_key, spans))
|
252 |
|
253 |
embedding_matrix = np.array(embedding_vectors)
|
|
|
254 |
# Write the embedding index and the ordered UUID column to disk so they can be joined later.
|
255 |
|
256 |
with open_file(output_path_prefix + _EMBEDDINGS_SUFFIX, 'wb') as f:
|
257 |
np.save(cast(str, f), embedding_matrix, allow_pickle=False)
|
258 |
+
with open_file(output_path_prefix + _SPANS_SUFFIX, 'wb') as f:
|
259 |
+
pickle.dump(all_spans, f)
|
260 |
|
261 |
return output_path_prefix
|
262 |
|
263 |
|
264 |
+
def read_embeddings_from_disk(
|
265 |
+
filepath_prefix: str) -> tuple[list[tuple[PathKey, list[tuple[int, int]]]], np.ndarray]:
|
266 |
+
"""Reads the embeddings from disk."""
|
267 |
if not file_exists(filepath_prefix + _EMBEDDINGS_SUFFIX):
|
268 |
raise ValueError(F'Embedding index does not exist at path {filepath_prefix}. '
|
269 |
'Please run dataset.compute_signal() on the embedding signal first.')
|
|
|
270 |
# Read the embedding index from disk.
|
271 |
embeddings = np.load(filepath_prefix + _EMBEDDINGS_SUFFIX, allow_pickle=False)
|
272 |
+
with open_file(filepath_prefix + _SPANS_SUFFIX, 'rb') as f:
|
273 |
+
spans: list[tuple[PathKey, list[tuple[int, int]]]] = pickle.load(f)
|
274 |
+
return spans, embeddings
|
275 |
|
276 |
|
277 |
def write_items_to_parquet(items: Iterable[Item], output_dir: str, schema: Schema,
|
lilac/embeddings/vector_store.py
CHANGED
@@ -1,9 +1,10 @@
|
|
1 |
"""Interface for storing vectors."""
|
2 |
|
3 |
import abc
|
4 |
-
from typing import Iterable, Optional
|
5 |
|
6 |
import numpy as np
|
|
|
7 |
|
8 |
from ..schema import VectorKey
|
9 |
|
@@ -29,7 +30,7 @@ class VectorStore(abc.ABC):
|
|
29 |
pass
|
30 |
|
31 |
@abc.abstractmethod
|
32 |
-
def get(self, keys: Iterable[VectorKey]) -> np.ndarray:
|
33 |
"""Return the embeddings for given keys.
|
34 |
|
35 |
Args:
|
@@ -43,15 +44,95 @@ class VectorStore(abc.ABC):
|
|
43 |
def topk(self,
|
44 |
query: np.ndarray,
|
45 |
k: int,
|
46 |
-
|
47 |
"""Return the top k most similar vectors.
|
48 |
|
49 |
Args:
|
50 |
query: The query vector.
|
51 |
k: The number of results to return.
|
52 |
-
|
53 |
|
54 |
Returns
|
55 |
A list of (key, score) tuples.
|
56 |
"""
|
57 |
raise NotImplementedError
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
"""Interface for storing vectors."""
|
2 |
|
3 |
import abc
|
4 |
+
from typing import Iterable, Optional, Type
|
5 |
|
6 |
import numpy as np
|
7 |
+
from typing_extensions import TypedDict
|
8 |
|
9 |
from ..schema import VectorKey
|
10 |
|
|
|
30 |
pass
|
31 |
|
32 |
@abc.abstractmethod
|
33 |
+
def get(self, keys: Optional[Iterable[VectorKey]] = None) -> np.ndarray:
|
34 |
"""Return the embeddings for given keys.
|
35 |
|
36 |
Args:
|
|
|
44 |
def topk(self,
|
45 |
query: np.ndarray,
|
46 |
k: int,
|
47 |
+
keys: Optional[Iterable[VectorKey]] = None) -> list[tuple[VectorKey, float]]:
|
48 |
"""Return the top k most similar vectors.
|
49 |
|
50 |
Args:
|
51 |
query: The query vector.
|
52 |
k: The number of results to return.
|
53 |
+
keys: Optional keys to restrict the search to.
|
54 |
|
55 |
Returns
|
56 |
A list of (key, score) tuples.
|
57 |
"""
|
58 |
raise NotImplementedError
|
59 |
+
|
60 |
+
|
61 |
+
class SpanVector(TypedDict):
|
62 |
+
"""A span with a vector."""
|
63 |
+
span: tuple[int, int]
|
64 |
+
vector: np.ndarray
|
65 |
+
|
66 |
+
|
67 |
+
PathKey = VectorKey
|
68 |
+
|
69 |
+
|
70 |
+
class VectorDBIndex:
|
71 |
+
"""Stores and retrives span vectors.
|
72 |
+
|
73 |
+
This wraps a regular vector store by adding a mapping from path keys, such as (uuid1, 0),
|
74 |
+
to span keys, such as (uuid1, 0, 0), which denotes the first span in the (uuid1, 0) text document.
|
75 |
+
"""
|
76 |
+
|
77 |
+
def __init__(self, vector_store_cls: Type[VectorStore],
|
78 |
+
spans: list[tuple[PathKey, list[tuple[int, int]]]], embeddings: np.ndarray) -> None:
|
79 |
+
vector_keys = [(*path_key, i) for path_key, spans in spans for i in range(len(spans))]
|
80 |
+
self._vector_store = vector_store_cls()
|
81 |
+
self._vector_store.add(vector_keys, embeddings)
|
82 |
+
# Map a path key to spans for that path.
|
83 |
+
self._id_to_spans: dict[PathKey, list[tuple[int, int]]] = {}
|
84 |
+
self._id_to_spans.update(spans)
|
85 |
+
|
86 |
+
def get_vector_store(self) -> VectorStore:
|
87 |
+
"""Return the vector store."""
|
88 |
+
return self._vector_store
|
89 |
+
|
90 |
+
def get(self, keys: Iterable[PathKey]) -> Iterable[list[SpanVector]]:
|
91 |
+
"""Return the spans with vectors for each key in `keys`.
|
92 |
+
|
93 |
+
Args:
|
94 |
+
keys: The keys to return the vectors for.
|
95 |
+
|
96 |
+
Returns
|
97 |
+
The span vectors for the given keys.
|
98 |
+
"""
|
99 |
+
all_spans: list[list[tuple[int, int]]] = []
|
100 |
+
vector_keys: list[VectorKey] = []
|
101 |
+
for path_key in keys:
|
102 |
+
spans = self._id_to_spans[path_key]
|
103 |
+
all_spans.append(spans)
|
104 |
+
vector_keys.extend([(*path_key, i) for i in range(len(spans))])
|
105 |
+
|
106 |
+
all_vectors = self._vector_store.get(vector_keys)
|
107 |
+
offset = 0
|
108 |
+
for spans in all_spans:
|
109 |
+
vectors = all_vectors[offset:offset + len(spans)]
|
110 |
+
yield [{'span': span, 'vector': vector} for span, vector in zip(spans, vectors)]
|
111 |
+
offset += len(spans)
|
112 |
+
|
113 |
+
def topk(self,
|
114 |
+
query: np.ndarray,
|
115 |
+
k: int,
|
116 |
+
path_keys: Optional[Iterable[PathKey]] = None) -> list[tuple[PathKey, float]]:
|
117 |
+
"""Return the top k most similar vectors.
|
118 |
+
|
119 |
+
Args:
|
120 |
+
query: The query vector.
|
121 |
+
k: The number of results to return.
|
122 |
+
path_keys: Optional key prefixes to restrict the search to.
|
123 |
+
|
124 |
+
Returns
|
125 |
+
A list of (key, score) tuples.
|
126 |
+
"""
|
127 |
+
vector_keys: Optional[list[VectorKey]] = None
|
128 |
+
if path_keys:
|
129 |
+
vector_keys = [
|
130 |
+
(*path_key, i) for path_key in path_keys for i in range(len(self._id_to_spans[path_key]))
|
131 |
+
]
|
132 |
+
vector_key_scores = self._vector_store.topk(query, k, vector_keys)
|
133 |
+
path_key_scores: dict[PathKey, float] = {}
|
134 |
+
for (*path_key_list, _), score in vector_key_scores:
|
135 |
+
path_key = tuple(path_key_list)
|
136 |
+
if path_key not in path_key_scores:
|
137 |
+
path_key_scores[path_key] = score
|
138 |
+
return list(path_key_scores.items())
|
lilac/embeddings/vector_store_numpy.py
CHANGED
@@ -34,13 +34,11 @@ class NumpyVectorStore(VectorStore):
|
|
34 |
# Cast to float32 since dot product with float32 is 40-50x faster than float16 and 2.5x faster
|
35 |
# than float64.
|
36 |
self._embeddings = embeddings.astype(np.float32)
|
37 |
-
|
38 |
-
index = pd.MultiIndex.from_tuples(keys)
|
39 |
row_indices = np.arange(len(self._embeddings), dtype=np.uint32)
|
40 |
-
self._lookup = pd.Series(row_indices, index=
|
41 |
|
42 |
@override
|
43 |
-
def get(self, keys: Iterable[VectorKey]) -> np.ndarray:
|
44 |
"""Return the embeddings for given keys.
|
45 |
|
46 |
Args:
|
@@ -49,6 +47,8 @@ class NumpyVectorStore(VectorStore):
|
|
49 |
Returns
|
50 |
The embeddings for the given keys.
|
51 |
"""
|
|
|
|
|
52 |
locs = self._lookup.loc[cast(list[str], keys)]
|
53 |
return self._embeddings.take(locs, axis=0)
|
54 |
|
@@ -56,14 +56,10 @@ class NumpyVectorStore(VectorStore):
|
|
56 |
def topk(self,
|
57 |
query: np.ndarray,
|
58 |
k: int,
|
59 |
-
|
60 |
-
if
|
61 |
-
# Cast tuples of length 1 to the element itself to avoid a pandas bug.
|
62 |
-
key_prefixes = cast(
|
63 |
-
list[VectorKey],
|
64 |
-
[k[0] if isinstance(k, tuple) and len(k) == 1 else k for k in key_prefixes])
|
65 |
# This uses the hierarchical index (MutliIndex) to do a prefix lookup.
|
66 |
-
row_indices = self._lookup.loc[cast(list[str],
|
67 |
keys, embeddings = list(row_indices.index), self._embeddings.take(row_indices, axis=0)
|
68 |
else:
|
69 |
keys, embeddings = self._keys, self._embeddings
|
|
|
34 |
# Cast to float32 since dot product with float32 is 40-50x faster than float16 and 2.5x faster
|
35 |
# than float64.
|
36 |
self._embeddings = embeddings.astype(np.float32)
|
|
|
|
|
37 |
row_indices = np.arange(len(self._embeddings), dtype=np.uint32)
|
38 |
+
self._lookup = pd.Series(row_indices, index=keys)
|
39 |
|
40 |
@override
|
41 |
+
def get(self, keys: Optional[Iterable[VectorKey]] = None) -> np.ndarray:
|
42 |
"""Return the embeddings for given keys.
|
43 |
|
44 |
Args:
|
|
|
47 |
Returns
|
48 |
The embeddings for the given keys.
|
49 |
"""
|
50 |
+
if not keys:
|
51 |
+
return self._embeddings
|
52 |
locs = self._lookup.loc[cast(list[str], keys)]
|
53 |
return self._embeddings.take(locs, axis=0)
|
54 |
|
|
|
56 |
def topk(self,
|
57 |
query: np.ndarray,
|
58 |
k: int,
|
59 |
+
keys: Optional[Iterable[VectorKey]] = None) -> list[tuple[VectorKey, float]]:
|
60 |
+
if keys is not None:
|
|
|
|
|
|
|
|
|
61 |
# This uses the hierarchical index (MutliIndex) to do a prefix lookup.
|
62 |
+
row_indices = self._lookup.loc[cast(list[str], keys)]
|
63 |
keys, embeddings = list(row_indices.index), self._embeddings.take(row_indices, axis=0)
|
64 |
else:
|
65 |
keys, embeddings = self._keys, self._embeddings
|
lilac/router_concept.py
CHANGED
@@ -125,7 +125,7 @@ class ScoreBody(BaseModel):
|
|
125 |
|
126 |
class ScoreResponse(BaseModel):
|
127 |
"""Response body for the score endpoint."""
|
128 |
-
|
129 |
model_synced: bool
|
130 |
|
131 |
|
@@ -212,7 +212,7 @@ def score(namespace: str, concept_name: str, embedding_name: str, body: ScoreBod
|
|
212 |
model_updated = DISK_CONCEPT_MODEL_DB.sync(model, user)
|
213 |
# TODO(smilkov): Support images.
|
214 |
texts = [example.text or '' for example in body.examples]
|
215 |
-
return ScoreResponse(
|
216 |
|
217 |
|
218 |
class Examples(OpenAISchema):
|
|
|
125 |
|
126 |
class ScoreResponse(BaseModel):
|
127 |
"""Response body for the score endpoint."""
|
128 |
+
scored_spans: list[list[dict]]
|
129 |
model_synced: bool
|
130 |
|
131 |
|
|
|
212 |
model_updated = DISK_CONCEPT_MODEL_DB.sync(model, user)
|
213 |
# TODO(smilkov): Support images.
|
214 |
texts = [example.text or '' for example in body.examples]
|
215 |
+
return ScoreResponse(scored_spans=model.score(body.draft, texts), model_synced=model_updated)
|
216 |
|
217 |
|
218 |
class Examples(OpenAISchema):
|
lilac/router_dataset.py
CHANGED
@@ -29,13 +29,7 @@ from .schema import Bin, Path, normalize_path
|
|
29 |
from .signals.concept_labels import ConceptLabelsSignal
|
30 |
from .signals.concept_scorer import ConceptScoreSignal
|
31 |
from .signals.semantic_similarity import SemanticSimilaritySignal
|
32 |
-
from .signals.signal import
|
33 |
-
Signal,
|
34 |
-
TextEmbeddingModelSignal,
|
35 |
-
TextEmbeddingSignal,
|
36 |
-
TextSignal,
|
37 |
-
resolve_signal,
|
38 |
-
)
|
39 |
from .signals.substring_search import SubstringSignal
|
40 |
from .tasks import TaskId, task_manager
|
41 |
from .utils import DatasetInfo, list_datasets
|
@@ -176,8 +170,7 @@ class ListFilter(BaseModel):
|
|
176 |
Filter = Union[BinaryFilter, UnaryFilter, ListFilter]
|
177 |
|
178 |
AllSignalTypes = Union[ConceptScoreSignal, ConceptLabelsSignal, SubstringSignal,
|
179 |
-
SemanticSimilaritySignal,
|
180 |
-
TextSignal, Signal]
|
181 |
|
182 |
|
183 |
# We override the `Column` class so we can add explicitly all signal types for better OpenAPI spec.
|
|
|
29 |
from .signals.concept_labels import ConceptLabelsSignal
|
30 |
from .signals.concept_scorer import ConceptScoreSignal
|
31 |
from .signals.semantic_similarity import SemanticSimilaritySignal
|
32 |
+
from .signals.signal import Signal, TextEmbeddingSignal, TextSignal, resolve_signal
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
from .signals.substring_search import SubstringSignal
|
34 |
from .tasks import TaskId, task_manager
|
35 |
from .utils import DatasetInfo, list_datasets
|
|
|
170 |
Filter = Union[BinaryFilter, UnaryFilter, ListFilter]
|
171 |
|
172 |
AllSignalTypes = Union[ConceptScoreSignal, ConceptLabelsSignal, SubstringSignal,
|
173 |
+
SemanticSimilaritySignal, TextEmbeddingSignal, TextSignal, Signal]
|
|
|
174 |
|
175 |
|
176 |
# We override the `Column` class so we can add explicitly all signal types for better OpenAPI spec.
|
lilac/schema.py
CHANGED
@@ -40,6 +40,7 @@ PathKeyedItem = tuple[Path, Item]
|
|
40 |
# These fields are for for python only and not written to a schema.
|
41 |
RichData = Union[str, bytes]
|
42 |
VectorKey = tuple[Union[StrictStr, StrictInt], ...]
|
|
|
43 |
|
44 |
|
45 |
class DataType(str, Enum):
|
@@ -94,16 +95,15 @@ class SignalInputType(str, Enum):
|
|
94 |
return self.value
|
95 |
|
96 |
|
97 |
-
|
98 |
SignalInputType.TEXT: [DataType.STRING, DataType.STRING_SPAN],
|
99 |
-
SignalInputType.TEXT_EMBEDDING: [DataType.EMBEDDING],
|
100 |
SignalInputType.IMAGE: [DataType.BINARY],
|
101 |
}
|
102 |
|
103 |
|
104 |
-
def
|
105 |
"""Returns True if the signal compute type supports the dtype."""
|
106 |
-
return dtype in
|
107 |
|
108 |
|
109 |
Bin = tuple[str, Optional[Union[float, int]], Optional[Union[float, int]]]
|
|
|
40 |
# These fields are for for python only and not written to a schema.
|
41 |
RichData = Union[str, bytes]
|
42 |
VectorKey = tuple[Union[StrictStr, StrictInt], ...]
|
43 |
+
PathKey = VectorKey
|
44 |
|
45 |
|
46 |
class DataType(str, Enum):
|
|
|
95 |
return self.value
|
96 |
|
97 |
|
98 |
+
SIGNAL_TYPE_TO_VALID_DTYPES: dict[SignalInputType, list[DataType]] = {
|
99 |
SignalInputType.TEXT: [DataType.STRING, DataType.STRING_SPAN],
|
|
|
100 |
SignalInputType.IMAGE: [DataType.BINARY],
|
101 |
}
|
102 |
|
103 |
|
104 |
+
def signal_type_supports_dtype(input_type: SignalInputType, dtype: DataType) -> bool:
|
105 |
"""Returns True if the signal compute type supports the dtype."""
|
106 |
+
return dtype in SIGNAL_TYPE_TO_VALID_DTYPES[input_type]
|
107 |
|
108 |
|
109 |
Bin = tuple[str, Optional[Union[float, int]], Optional[Union[float, int]]]
|
lilac/server.py
CHANGED
@@ -117,7 +117,7 @@ def startup() -> None:
|
|
117 |
|
118 |
if repo_id:
|
119 |
# Copy datasets.
|
120 |
-
spaces_data_dir =
|
121 |
datasets = list_datasets(spaces_data_dir)
|
122 |
for dataset in datasets:
|
123 |
spaces_dataset_output_dir = get_dataset_output_dir(spaces_data_dir, dataset.namespace,
|
|
|
117 |
|
118 |
if repo_id:
|
119 |
# Copy datasets.
|
120 |
+
spaces_data_dir = 'data'
|
121 |
datasets = list_datasets(spaces_data_dir)
|
122 |
for dataset in datasets:
|
123 |
spaces_dataset_output_dir = get_dataset_output_dir(spaces_data_dir, dataset.namespace,
|
lilac/signals/concept_scorer.py
CHANGED
@@ -7,14 +7,17 @@ from typing_extensions import override
|
|
7 |
from ..auth import UserInfo
|
8 |
from ..concepts.concept import DEFAULT_NUM_NEG_EXAMPLES, DRAFT_MAIN, ConceptColumnInfo, ConceptModel
|
9 |
from ..concepts.db_concept import DISK_CONCEPT_MODEL_DB, ConceptModelDB
|
10 |
-
from ..
|
11 |
-
from ..
|
12 |
-
from
|
|
|
13 |
|
14 |
|
15 |
-
class ConceptScoreSignal(
|
16 |
"""Compute scores along a given concept for documents."""
|
17 |
name = 'concept_score'
|
|
|
|
|
18 |
display_name = 'Concept'
|
19 |
|
20 |
namespace: str
|
@@ -33,10 +36,16 @@ class ConceptScoreSignal(TextEmbeddingModelSignal):
|
|
33 |
|
34 |
@override
|
35 |
def fields(self) -> Field:
|
36 |
-
return field(
|
37 |
-
|
38 |
-
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
def set_column_info(self, column_info: ConceptColumnInfo) -> None:
|
42 |
"""Set the dataset info for this signal."""
|
@@ -63,26 +72,34 @@ class ConceptScoreSignal(TextEmbeddingModelSignal):
|
|
63 |
return concept_model.score(self.draft, data)
|
64 |
|
65 |
@override
|
66 |
-
def vector_compute(self, keys: Iterable[
|
67 |
-
|
68 |
concept_model = self._get_concept_model()
|
69 |
-
|
70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
@override
|
73 |
def vector_compute_topk(
|
74 |
self,
|
75 |
topk: int,
|
76 |
-
|
77 |
-
keys: Optional[Iterable[
|
78 |
concept_model = self._get_concept_model()
|
79 |
query: np.ndarray = concept_model.coef(self.draft)
|
80 |
-
topk_keys = [key for key, _ in
|
81 |
-
return list(zip(topk_keys, self.vector_compute(topk_keys,
|
82 |
|
83 |
@override
|
84 |
def key(self, is_computed_signal: Optional[bool] = False) -> str:
|
85 |
# NOTE: The embedding is a value so already exists in the path structure. This means we do not
|
86 |
# need to provide the name as part of the key, which still guarantees uniqueness.
|
87 |
version = f'/v{self._get_concept_model().version}' if is_computed_signal else ''
|
88 |
-
return f'{self.namespace}/{self.concept_name}{version}'
|
|
|
7 |
from ..auth import UserInfo
|
8 |
from ..concepts.concept import DEFAULT_NUM_NEG_EXAMPLES, DRAFT_MAIN, ConceptColumnInfo, ConceptModel
|
9 |
from ..concepts.db_concept import DISK_CONCEPT_MODEL_DB, ConceptModelDB
|
10 |
+
from ..data.dataset_utils import lilac_span
|
11 |
+
from ..embeddings.vector_store import VectorDBIndex
|
12 |
+
from ..schema import Field, Item, PathKey, RichData, SignalInputType, field
|
13 |
+
from ..signals.signal import VectorSignal
|
14 |
|
15 |
|
16 |
+
class ConceptScoreSignal(VectorSignal):
|
17 |
"""Compute scores along a given concept for documents."""
|
18 |
name = 'concept_score'
|
19 |
+
input_type = SignalInputType.TEXT
|
20 |
+
|
21 |
display_name = 'Concept'
|
22 |
|
23 |
namespace: str
|
|
|
36 |
|
37 |
@override
|
38 |
def fields(self) -> Field:
|
39 |
+
return field(fields=[
|
40 |
+
field(
|
41 |
+
dtype='string_span',
|
42 |
+
fields={
|
43 |
+
'score': field(
|
44 |
+
'float32',
|
45 |
+
bins=[('Not in concept', None, 0.5), ('In concept', 0.5, None)],
|
46 |
+
)
|
47 |
+
})
|
48 |
+
])
|
49 |
|
50 |
def set_column_info(self, column_info: ConceptColumnInfo) -> None:
|
51 |
"""Set the dataset info for this signal."""
|
|
|
72 |
return concept_model.score(self.draft, data)
|
73 |
|
74 |
@override
|
75 |
+
def vector_compute(self, keys: Iterable[PathKey],
|
76 |
+
vector_index: VectorDBIndex) -> Iterable[Optional[Item]]:
|
77 |
concept_model = self._get_concept_model()
|
78 |
+
all_vector_spans = vector_index.get(keys)
|
79 |
+
# TODO(smilkov): Do this with batched computation.
|
80 |
+
for vector_spans in all_vector_spans:
|
81 |
+
embeddings = np.array([vector_span['vector'] for vector_span in vector_spans])
|
82 |
+
scores = concept_model.score_embeddings(self.draft, embeddings)
|
83 |
+
res: Item = []
|
84 |
+
for vector_span, score in zip(vector_spans, scores):
|
85 |
+
start, end = vector_span['span']
|
86 |
+
res.append(lilac_span(start, end, {'score': score}))
|
87 |
+
yield res
|
88 |
|
89 |
@override
|
90 |
def vector_compute_topk(
|
91 |
self,
|
92 |
topk: int,
|
93 |
+
vector_index: VectorDBIndex,
|
94 |
+
keys: Optional[Iterable[PathKey]] = None) -> list[tuple[PathKey, Optional[Item]]]:
|
95 |
concept_model = self._get_concept_model()
|
96 |
query: np.ndarray = concept_model.coef(self.draft)
|
97 |
+
topk_keys = [key for key, _ in vector_index.topk(query, topk, keys)]
|
98 |
+
return list(zip(topk_keys, self.vector_compute(topk_keys, vector_index)))
|
99 |
|
100 |
@override
|
101 |
def key(self, is_computed_signal: Optional[bool] = False) -> str:
|
102 |
# NOTE: The embedding is a value so already exists in the path structure. This means we do not
|
103 |
# need to provide the name as part of the key, which still guarantees uniqueness.
|
104 |
version = f'/v{self._get_concept_model().version}' if is_computed_signal else ''
|
105 |
+
return f'{self.namespace}/{self.concept_name}/{self.embedding}{version}'
|
lilac/signals/lang_detection.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
"""Language detection of a document."""
|
2 |
import re
|
3 |
-
from typing import
|
4 |
|
5 |
from pydantic import Field as PydanticField
|
6 |
from typing_extensions import override
|
@@ -10,9 +10,7 @@ from ..schema import Field, Item, RichData, SignalInputType, field
|
|
10 |
from .signal import TextSignal
|
11 |
|
12 |
LANG_CODE = 'lang_code'
|
13 |
-
|
14 |
-
if TYPE_CHECKING:
|
15 |
-
import langdetect
|
16 |
|
17 |
|
18 |
class LangDetectionSignal(TextSignal):
|
@@ -27,12 +25,18 @@ class LangDetectionSignal(TextSignal):
|
|
27 |
display_name = 'Language detection'
|
28 |
|
29 |
input_type = SignalInputType.TEXT
|
30 |
-
compute_type = SignalInputType.TEXT
|
31 |
|
32 |
split_by_paragraph: bool = PydanticField(
|
33 |
default=False, description='Compute language scores for each paragraph.')
|
34 |
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
@override
|
38 |
def setup(self) -> None:
|
@@ -42,7 +46,6 @@ class LangDetectionSignal(TextSignal):
|
|
42 |
except ImportError:
|
43 |
raise ImportError('Could not import the "langdetect" python package. '
|
44 |
'Please install it with `pip install langdetect`.')
|
45 |
-
self._model = langdetect.detect
|
46 |
|
47 |
@override
|
48 |
def fields(self) -> Field:
|
@@ -52,9 +55,6 @@ class LangDetectionSignal(TextSignal):
|
|
52 |
|
53 |
@override
|
54 |
def compute(self, data: Iterable[RichData]) -> Iterable[Optional[Item]]:
|
55 |
-
if not self._model:
|
56 |
-
raise RuntimeError('Language detection model is not initialized.')
|
57 |
-
|
58 |
import langdetect
|
59 |
data = cast(Iterable[str], data)
|
60 |
# Split on paragraphs.
|
@@ -62,10 +62,7 @@ class LangDetectionSignal(TextSignal):
|
|
62 |
|
63 |
for text in data:
|
64 |
if not self.split_by_paragraph:
|
65 |
-
|
66 |
-
yield self._model(text)
|
67 |
-
except langdetect.LangDetectException:
|
68 |
-
yield None
|
69 |
continue
|
70 |
|
71 |
prev_end = 0
|
@@ -75,20 +72,16 @@ class LangDetectionSignal(TextSignal):
|
|
75 |
text_span = text[prev_end:start]
|
76 |
text_span = text_span.strip()
|
77 |
if text_span:
|
78 |
-
|
79 |
-
|
80 |
result.append(lilac_span(prev_end, start, {LANG_CODE: lang_code}))
|
81 |
-
except langdetect.LangDetectException:
|
82 |
-
pass
|
83 |
prev_end = end
|
84 |
|
85 |
# Process the last chunk.
|
86 |
text_span = text[prev_end:]
|
87 |
if text_span.strip():
|
88 |
-
|
89 |
-
|
90 |
result.append(lilac_span(prev_end, len(text), {LANG_CODE: lang_code}))
|
91 |
-
except langdetect.LangDetectException:
|
92 |
-
pass
|
93 |
|
94 |
yield result
|
|
|
1 |
"""Language detection of a document."""
|
2 |
import re
|
3 |
+
from typing import Any, Iterable, Optional, cast
|
4 |
|
5 |
from pydantic import Field as PydanticField
|
6 |
from typing_extensions import override
|
|
|
10 |
from .signal import TextSignal
|
11 |
|
12 |
LANG_CODE = 'lang_code'
|
13 |
+
TEXT_LEN_THRESHOLD = 25
|
|
|
|
|
14 |
|
15 |
|
16 |
class LangDetectionSignal(TextSignal):
|
|
|
25 |
display_name = 'Language detection'
|
26 |
|
27 |
input_type = SignalInputType.TEXT
|
|
|
28 |
|
29 |
split_by_paragraph: bool = PydanticField(
|
30 |
default=False, description='Compute language scores for each paragraph.')
|
31 |
|
32 |
+
def _detect(self, text: str, langdetect: Any) -> Optional[str]:
|
33 |
+
|
34 |
+
if len(text) < TEXT_LEN_THRESHOLD:
|
35 |
+
return 'TOO_SHORT'
|
36 |
+
try:
|
37 |
+
return langdetect.detect(text)
|
38 |
+
except langdetect.LangDetectException:
|
39 |
+
return None
|
40 |
|
41 |
@override
|
42 |
def setup(self) -> None:
|
|
|
46 |
except ImportError:
|
47 |
raise ImportError('Could not import the "langdetect" python package. '
|
48 |
'Please install it with `pip install langdetect`.')
|
|
|
49 |
|
50 |
@override
|
51 |
def fields(self) -> Field:
|
|
|
55 |
|
56 |
@override
|
57 |
def compute(self, data: Iterable[RichData]) -> Iterable[Optional[Item]]:
|
|
|
|
|
|
|
58 |
import langdetect
|
59 |
data = cast(Iterable[str], data)
|
60 |
# Split on paragraphs.
|
|
|
62 |
|
63 |
for text in data:
|
64 |
if not self.split_by_paragraph:
|
65 |
+
yield self._detect(text, langdetect)
|
|
|
|
|
|
|
66 |
continue
|
67 |
|
68 |
prev_end = 0
|
|
|
72 |
text_span = text[prev_end:start]
|
73 |
text_span = text_span.strip()
|
74 |
if text_span:
|
75 |
+
lang_code = self._detect(text_span, langdetect)
|
76 |
+
if lang_code:
|
77 |
result.append(lilac_span(prev_end, start, {LANG_CODE: lang_code}))
|
|
|
|
|
78 |
prev_end = end
|
79 |
|
80 |
# Process the last chunk.
|
81 |
text_span = text[prev_end:]
|
82 |
if text_span.strip():
|
83 |
+
lang_code = self._detect(text_span, langdetect)
|
84 |
+
if lang_code:
|
85 |
result.append(lilac_span(prev_end, len(text), {LANG_CODE: lang_code}))
|
|
|
|
|
86 |
|
87 |
yield result
|
lilac/signals/minhash_dup.py
CHANGED
@@ -17,7 +17,7 @@ from scipy.integrate import quad as integrate
|
|
17 |
from tqdm import tqdm
|
18 |
|
19 |
SEED = 42
|
20 |
-
|
21 |
RNG = np.random.RandomState(SEED)
|
22 |
MAX_HASH = np.uint64((1 << 32) - 1)
|
23 |
MERSENNE_PRIME = np.uint64((1 << 61) - 1)
|
@@ -72,7 +72,7 @@ def _embed_func(
|
|
72 |
The hash values in each range and the index.
|
73 |
"""
|
74 |
hashvalues = np.ones(num_perm, dtype=np.uint64) * MAX_HASH
|
75 |
-
tokens = {' '.join(t) for t in _ngrams(
|
76 |
hv = np.array([_sha1_hash32(token.encode('utf-8')) for token in tokens],
|
77 |
dtype=np.uint64) # noqa: E501
|
78 |
a, b = permutations
|
|
|
17 |
from tqdm import tqdm
|
18 |
|
19 |
SEED = 42
|
20 |
+
WHITESPACE = re.compile(r'\s+')
|
21 |
RNG = np.random.RandomState(SEED)
|
22 |
MAX_HASH = np.uint64((1 << 32) - 1)
|
23 |
MERSENNE_PRIME = np.uint64((1 << 61) - 1)
|
|
|
72 |
The hash values in each range and the index.
|
73 |
"""
|
74 |
hashvalues = np.ones(num_perm, dtype=np.uint64) * MAX_HASH
|
75 |
+
tokens = {' '.join(t) for t in _ngrams(WHITESPACE.split(content), ngram_size, min_ngram_size)}
|
76 |
hv = np.array([_sha1_hash32(token.encode('utf-8')) for token in tokens],
|
77 |
dtype=np.uint64) # noqa: E501
|
78 |
a, b = permutations
|
lilac/signals/near_dup.py
CHANGED
@@ -24,7 +24,6 @@ class NearDuplicateSignal(TextSignal):
|
|
24 |
display_name = 'Near duplicate documents'
|
25 |
|
26 |
input_type = SignalInputType.TEXT
|
27 |
-
compute_type = SignalInputType.TEXT
|
28 |
|
29 |
threshold: float = PydanticField(
|
30 |
default=0.75,
|
|
|
24 |
display_name = 'Near duplicate documents'
|
25 |
|
26 |
input_type = SignalInputType.TEXT
|
|
|
27 |
|
28 |
threshold: float = PydanticField(
|
29 |
default=0.75,
|
lilac/signals/ner.py
CHANGED
@@ -23,7 +23,6 @@ class SpacyNER(TextSignal):
|
|
23 |
model: str = PydanticField(title='SpaCy package name or model path.', default='en_core_web_sm')
|
24 |
|
25 |
input_type = SignalInputType.TEXT
|
26 |
-
compute_type = SignalInputType.TEXT
|
27 |
|
28 |
_nlp: Optional['spacy.language.Language'] = None
|
29 |
|
|
|
23 |
model: str = PydanticField(title='SpaCy package name or model path.', default='en_core_web_sm')
|
24 |
|
25 |
input_type = SignalInputType.TEXT
|
|
|
26 |
|
27 |
_nlp: Optional['spacy.language.Language'] = None
|
28 |
|
lilac/signals/pii.py
CHANGED
@@ -25,7 +25,6 @@ class PIISignal(TextSignal):
|
|
25 |
display_name = 'Personal Information (PII)'
|
26 |
|
27 |
input_type = SignalInputType.TEXT
|
28 |
-
compute_type = SignalInputType.TEXT
|
29 |
|
30 |
@override
|
31 |
def fields(self) -> Field:
|
|
|
25 |
display_name = 'Personal Information (PII)'
|
26 |
|
27 |
input_type = SignalInputType.TEXT
|
|
|
28 |
|
29 |
@override
|
30 |
def fields(self) -> Field:
|
lilac/signals/semantic_similarity.py
CHANGED
@@ -5,13 +5,14 @@ import numpy as np
|
|
5 |
from scipy.interpolate import interp1d
|
6 |
from typing_extensions import override
|
7 |
|
|
|
8 |
from ..embeddings.embedding import EmbedFn, get_embed_fn
|
9 |
-
from ..embeddings.vector_store import
|
10 |
-
from ..schema import Field, Item, RichData,
|
11 |
-
from .signal import
|
12 |
|
13 |
|
14 |
-
class SemanticSimilaritySignal(
|
15 |
"""Compute semantic similarity for a query and a document.
|
16 |
|
17 |
\
|
@@ -20,6 +21,7 @@ class SemanticSimilaritySignal(TextEmbeddingModelSignal):
|
|
20 |
"""
|
21 |
name = 'semantic_similarity'
|
22 |
display_name = 'Semantic Similarity'
|
|
|
23 |
|
24 |
query: str
|
25 |
|
@@ -32,15 +34,13 @@ class SemanticSimilaritySignal(TextEmbeddingModelSignal):
|
|
32 |
def __init__(self, query: Union[str, bytes], embedding: str, **kwargs: Any):
|
33 |
if isinstance(query, bytes):
|
34 |
raise ValueError('Image queries are not yet supported for SemanticSimilarity.')
|
35 |
-
|
36 |
-
super().__init__(query=query, embedding=embedding, **kwargs)
|
37 |
-
|
38 |
# TODO(nsthorat): The embedding cls might have arguments. This needs to be resolved.
|
39 |
self._embed_fn = get_embed_fn(embedding)
|
40 |
|
41 |
@override
|
42 |
def fields(self) -> Field:
|
43 |
-
return field('float32')
|
44 |
|
45 |
def _get_search_embedding(self) -> np.ndarray:
|
46 |
"""Return the embedding for the search text."""
|
@@ -51,26 +51,32 @@ class SemanticSimilaritySignal(TextEmbeddingModelSignal):
|
|
51 |
|
52 |
@override
|
53 |
def compute(self, data: Iterable[RichData]) -> Iterable[Optional[Item]]:
|
54 |
-
|
55 |
-
|
56 |
-
|
|
|
57 |
|
58 |
@override
|
59 |
-
def vector_compute(self, keys: Iterable[
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
#
|
64 |
-
|
65 |
-
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
@override
|
69 |
def vector_compute_topk(
|
70 |
self,
|
71 |
topk: int,
|
72 |
-
|
73 |
-
keys: Optional[Iterable[
|
74 |
query = self._get_search_embedding()
|
75 |
-
topk_keys = [key for key, _ in
|
76 |
-
return list(zip(topk_keys, self.vector_compute(topk_keys,
|
|
|
5 |
from scipy.interpolate import interp1d
|
6 |
from typing_extensions import override
|
7 |
|
8 |
+
from ..data.dataset_utils import lilac_span
|
9 |
from ..embeddings.embedding import EmbedFn, get_embed_fn
|
10 |
+
from ..embeddings.vector_store import VectorDBIndex
|
11 |
+
from ..schema import Field, Item, PathKey, RichData, SignalInputType, field
|
12 |
+
from .signal import VectorSignal
|
13 |
|
14 |
|
15 |
+
class SemanticSimilaritySignal(VectorSignal):
|
16 |
"""Compute semantic similarity for a query and a document.
|
17 |
|
18 |
\
|
|
|
21 |
"""
|
22 |
name = 'semantic_similarity'
|
23 |
display_name = 'Semantic Similarity'
|
24 |
+
input_type = SignalInputType.TEXT
|
25 |
|
26 |
query: str
|
27 |
|
|
|
34 |
def __init__(self, query: Union[str, bytes], embedding: str, **kwargs: Any):
|
35 |
if isinstance(query, bytes):
|
36 |
raise ValueError('Image queries are not yet supported for SemanticSimilarity.')
|
37 |
+
super().__init__(query=query, embedding=embedding, **kwargs) # type: ignore
|
|
|
|
|
38 |
# TODO(nsthorat): The embedding cls might have arguments. This needs to be resolved.
|
39 |
self._embed_fn = get_embed_fn(embedding)
|
40 |
|
41 |
@override
|
42 |
def fields(self) -> Field:
|
43 |
+
return field(fields=[field(dtype='string_span', fields={'score': 'float32'})])
|
44 |
|
45 |
def _get_search_embedding(self) -> np.ndarray:
|
46 |
"""Return the embedding for the search text."""
|
|
|
51 |
|
52 |
@override
|
53 |
def compute(self, data: Iterable[RichData]) -> Iterable[Optional[Item]]:
|
54 |
+
embeddings = self._embed_fn(data)
|
55 |
+
scores = embeddings.dot(self._get_search_embedding()).reshape(-1)
|
56 |
+
for text, score in zip(data, scores):
|
57 |
+
yield [lilac_span(0, len(text), {'score': score})]
|
58 |
|
59 |
@override
|
60 |
+
def vector_compute(self, keys: Iterable[PathKey],
|
61 |
+
vector_index: VectorDBIndex) -> Iterable[Optional[Item]]:
|
62 |
+
all_vector_spans = vector_index.get(keys)
|
63 |
+
query = self._get_search_embedding()
|
64 |
+
# TODO(smilkov): Do this with batched computation.
|
65 |
+
for vector_spans in all_vector_spans:
|
66 |
+
embeddings = np.array([vector_span['vector'] for vector_span in vector_spans])
|
67 |
+
scores = embeddings.dot(query).reshape(-1)
|
68 |
+
res: Item = []
|
69 |
+
for vector_span, score in zip(vector_spans, scores):
|
70 |
+
start, end = vector_span['span']
|
71 |
+
res.append(lilac_span(start, end, {'score': score}))
|
72 |
+
yield res
|
73 |
|
74 |
@override
|
75 |
def vector_compute_topk(
|
76 |
self,
|
77 |
topk: int,
|
78 |
+
vector_index: VectorDBIndex,
|
79 |
+
keys: Optional[Iterable[PathKey]] = None) -> list[tuple[PathKey, Optional[Item]]]:
|
80 |
query = self._get_search_embedding()
|
81 |
+
topk_keys = [key for key, _ in vector_index.topk(query, topk, keys)]
|
82 |
+
return list(zip(topk_keys, self.vector_compute(topk_keys, vector_index)))
|
lilac/signals/signal.py
CHANGED
@@ -6,8 +6,8 @@ from typing import Any, ClassVar, Iterable, Optional, Sequence, Type, TypeVar, U
|
|
6 |
from pydantic import BaseModel, Extra, validator
|
7 |
from typing_extensions import override
|
8 |
|
9 |
-
from ..embeddings.vector_store import
|
10 |
-
from ..schema import Field, Item, RichData, SignalInputType,
|
11 |
|
12 |
EMBEDDING_KEY = 'embedding'
|
13 |
|
@@ -19,12 +19,8 @@ class Signal(abc.ABC, BaseModel):
|
|
19 |
# The display name is just used for rendering in the UI.
|
20 |
display_name: ClassVar[Optional[str]]
|
21 |
|
22 |
-
# The input type is used to populate the UI
|
23 |
-
# if a signal is an TextEmbeddingModelSignal, it computes over embeddings, but it's input type
|
24 |
-
# will be text.
|
25 |
input_type: ClassVar[SignalInputType]
|
26 |
-
# The compute type defines what should be passed to compute().
|
27 |
-
compute_type: ClassVar[SignalInputType]
|
28 |
|
29 |
# The signal_name will get populated in init automatically from the class name so it gets
|
30 |
# serialized and the signal author doesn't have to define both the static property and the field.
|
@@ -79,40 +75,6 @@ class Signal(abc.ABC, BaseModel):
|
|
79 |
"""
|
80 |
raise NotImplementedError
|
81 |
|
82 |
-
def vector_compute(self, keys: Iterable[VectorKey],
|
83 |
-
vector_store: VectorStore) -> Iterable[Optional[Item]]:
|
84 |
-
"""Compute the signal for an iterable of keys that point to documents or images.
|
85 |
-
|
86 |
-
Args:
|
87 |
-
keys: An iterable of value ids (at row-level or lower) to lookup precomputed embeddings.
|
88 |
-
vector_store: The vector store to lookup pre-computed embeddings.
|
89 |
-
|
90 |
-
Returns
|
91 |
-
An iterable of items. Sparse signals should return "None" for skipped inputs.
|
92 |
-
"""
|
93 |
-
raise NotImplementedError
|
94 |
-
|
95 |
-
def vector_compute_topk(
|
96 |
-
self,
|
97 |
-
topk: int,
|
98 |
-
vector_store: VectorStore,
|
99 |
-
keys: Optional[Iterable[VectorKey]] = None) -> Sequence[tuple[VectorKey, Optional[Item]]]:
|
100 |
-
"""Return signal results only for the top k documents or images.
|
101 |
-
|
102 |
-
Signals decide how to rank each document/image in the dataset, usually by a similarity score
|
103 |
-
obtained via the vector store.
|
104 |
-
|
105 |
-
Args:
|
106 |
-
topk: The number of items to return, ranked by the signal.
|
107 |
-
vector_store: The vector store to lookup pre-computed embeddings.
|
108 |
-
keys: Optional iterable of row ids to restrict the search to.
|
109 |
-
|
110 |
-
Returns
|
111 |
-
A list of (key, signal_output) tuples containing the `topk` items. Sparse signals should
|
112 |
-
return "None" for skipped inputs.
|
113 |
-
"""
|
114 |
-
raise NotImplementedError
|
115 |
-
|
116 |
def key(self, is_computed_signal: Optional[bool] = False) -> str:
|
117 |
"""Get the key for a signal.
|
118 |
|
@@ -155,7 +117,6 @@ def _args_key_from_dict(args_dict: dict[str, Any]) -> str:
|
|
155 |
class TextSplitterSignal(Signal):
|
156 |
"""An interface for signals that compute over text."""
|
157 |
input_type = SignalInputType.TEXT
|
158 |
-
compute_type = SignalInputType.TEXT
|
159 |
|
160 |
@override
|
161 |
def fields(self) -> Field:
|
@@ -166,7 +127,6 @@ class TextSplitterSignal(Signal):
|
|
166 |
class TextSignal(Signal):
|
167 |
"""An interface for signals that compute over text."""
|
168 |
input_type = SignalInputType.TEXT
|
169 |
-
compute_type = SignalInputType.TEXT
|
170 |
|
171 |
@override
|
172 |
def key(self, is_computed_signal: Optional[bool] = False) -> str:
|
@@ -179,7 +139,6 @@ class TextSignal(Signal):
|
|
179 |
class TextEmbeddingSignal(TextSignal):
|
180 |
"""An interface for signals that compute embeddings for text."""
|
181 |
input_type = SignalInputType.TEXT
|
182 |
-
compute_type = SignalInputType.TEXT
|
183 |
|
184 |
_split = True
|
185 |
|
@@ -196,38 +155,43 @@ class TextEmbeddingSignal(TextSignal):
|
|
196 |
return field(fields=[field('string_span', fields={EMBEDDING_KEY: 'embedding'})])
|
197 |
|
198 |
|
199 |
-
class
|
200 |
-
"""An interface for signals that
|
201 |
-
input_type = SignalInputType.TEXT
|
202 |
-
# compute() takes embeddings, while it operates over text fields by transitively computing splits
|
203 |
-
# and embeddings.
|
204 |
-
compute_type = SignalInputType.TEXT_EMBEDDING
|
205 |
-
|
206 |
embedding: str
|
207 |
-
_embedding_signal: Optional[TextEmbeddingSignal] = None
|
208 |
|
209 |
-
def
|
210 |
-
|
|
|
211 |
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
|
|
|
|
225 |
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
231 |
|
232 |
|
233 |
Tsignal = TypeVar('Tsignal', bound=Signal)
|
|
|
6 |
from pydantic import BaseModel, Extra, validator
|
7 |
from typing_extensions import override
|
8 |
|
9 |
+
from ..embeddings.vector_store import VectorDBIndex
|
10 |
+
from ..schema import Field, Item, PathKey, RichData, SignalInputType, field
|
11 |
|
12 |
EMBEDDING_KEY = 'embedding'
|
13 |
|
|
|
19 |
# The display name is just used for rendering in the UI.
|
20 |
display_name: ClassVar[Optional[str]]
|
21 |
|
22 |
+
# The input type is used to populate the UI to determine what the signal accepts as input.
|
|
|
|
|
23 |
input_type: ClassVar[SignalInputType]
|
|
|
|
|
24 |
|
25 |
# The signal_name will get populated in init automatically from the class name so it gets
|
26 |
# serialized and the signal author doesn't have to define both the static property and the field.
|
|
|
75 |
"""
|
76 |
raise NotImplementedError
|
77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
def key(self, is_computed_signal: Optional[bool] = False) -> str:
|
79 |
"""Get the key for a signal.
|
80 |
|
|
|
117 |
class TextSplitterSignal(Signal):
|
118 |
"""An interface for signals that compute over text."""
|
119 |
input_type = SignalInputType.TEXT
|
|
|
120 |
|
121 |
@override
|
122 |
def fields(self) -> Field:
|
|
|
127 |
class TextSignal(Signal):
|
128 |
"""An interface for signals that compute over text."""
|
129 |
input_type = SignalInputType.TEXT
|
|
|
130 |
|
131 |
@override
|
132 |
def key(self, is_computed_signal: Optional[bool] = False) -> str:
|
|
|
139 |
class TextEmbeddingSignal(TextSignal):
|
140 |
"""An interface for signals that compute embeddings for text."""
|
141 |
input_type = SignalInputType.TEXT
|
|
|
142 |
|
143 |
_split = True
|
144 |
|
|
|
155 |
return field(fields=[field('string_span', fields={EMBEDDING_KEY: 'embedding'})])
|
156 |
|
157 |
|
158 |
+
class VectorSignal(Signal, abc.ABC):
|
159 |
+
"""An interface for signals that can compute items given vector inputs."""
|
|
|
|
|
|
|
|
|
|
|
160 |
embedding: str
|
|
|
161 |
|
162 |
+
def vector_compute(self, keys: Iterable[PathKey],
|
163 |
+
vector_index: VectorDBIndex) -> Iterable[Optional[Item]]:
|
164 |
+
"""Compute the signal for an iterable of keys that point to documents or images.
|
165 |
|
166 |
+
Args:
|
167 |
+
keys: An iterable of value ids (at row-level or lower) to lookup precomputed embeddings.
|
168 |
+
vector_index: The vector index to lookup pre-computed embeddings.
|
169 |
|
170 |
+
Returns
|
171 |
+
An iterable of items. Sparse signals should return "None" for skipped inputs.
|
172 |
+
"""
|
173 |
+
raise NotImplementedError
|
174 |
|
175 |
+
def vector_compute_topk(
|
176 |
+
self,
|
177 |
+
topk: int,
|
178 |
+
vector_index: VectorDBIndex,
|
179 |
+
keys: Optional[Iterable[PathKey]] = None) -> Sequence[tuple[PathKey, Optional[Item]]]:
|
180 |
+
"""Return signal results only for the top k documents or images.
|
181 |
|
182 |
+
Signals decide how to rank each document/image in the dataset, usually by a similarity score
|
183 |
+
obtained via the vector store.
|
184 |
+
|
185 |
+
Args:
|
186 |
+
topk: The number of items to return, ranked by the signal.
|
187 |
+
vector_index: The vector index to lookup pre-computed embeddings.
|
188 |
+
keys: Optional iterable of row ids to restrict the search to.
|
189 |
+
|
190 |
+
Returns
|
191 |
+
A list of (key, signal_output) tuples containing the `topk` items. Sparse signals should
|
192 |
+
return "None" for skipped inputs.
|
193 |
+
"""
|
194 |
+
raise NotImplementedError
|
195 |
|
196 |
|
197 |
Tsignal = TypeVar('Tsignal', bound=Signal)
|
lilac/signals/splitters/chunk_splitter.py
CHANGED
@@ -41,7 +41,7 @@ from ..signal import TextSplitterSignal
|
|
41 |
|
42 |
TextChunk = tuple[str, tuple[int, int]]
|
43 |
|
44 |
-
DEFAULT_SEPARATORS = ['\n\n', '\n', ' ', '']
|
45 |
CHUNK_SIZE = 400
|
46 |
CHUNK_OVERLAP = 50
|
47 |
|
@@ -99,10 +99,24 @@ def _sep_split(text: str, separator: str) -> list[TextChunk]:
|
|
99 |
|
100 |
offset = 0
|
101 |
chunks: list[TextChunk] = []
|
|
|
102 |
end_index = text.find(separator, offset)
|
103 |
|
104 |
while end_index >= 0:
|
105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
offset = end_index + len(separator)
|
107 |
end_index = text.find(separator, offset)
|
108 |
|
@@ -120,9 +134,8 @@ def split_text(text: str,
|
|
120 |
"""Split incoming text and return chunks."""
|
121 |
|
122 |
def _merge_splits(splits: Iterable[TextChunk], separator: str) -> list[TextChunk]:
|
123 |
-
# We now want to combine these smaller pieces into medium size
|
124 |
-
|
125 |
-
separator_len = length_function(separator)
|
126 |
|
127 |
docs: list[TextChunk] = []
|
128 |
current_doc: list[TextChunk] = []
|
@@ -157,12 +170,14 @@ def split_text(text: str,
|
|
157 |
final_chunks: list[TextChunk] = []
|
158 |
# Get appropriate separator to use
|
159 |
separator = separators[-1]
|
160 |
-
|
|
|
161 |
if _s == '':
|
162 |
separator = _s
|
163 |
break
|
164 |
if _s in text:
|
165 |
separator = _s
|
|
|
166 |
break
|
167 |
# Now that we have the separator, split the text.
|
168 |
splits = _sep_split(text, separator)
|
@@ -177,7 +192,8 @@ def split_text(text: str,
|
|
177 |
merged_text = _merge_splits(good_splits, separator)
|
178 |
final_chunks.extend(merged_text)
|
179 |
good_splits = []
|
180 |
-
other_chunks = split_text(text_chunk, chunk_size, chunk_overlap,
|
|
|
181 |
# Adjust the offsets of the other chunks.
|
182 |
other_chunks = [(t, (s + start, e + start)) for t, (s, e) in other_chunks]
|
183 |
final_chunks.extend(other_chunks)
|
@@ -188,6 +204,9 @@ def split_text(text: str,
|
|
188 |
|
189 |
|
190 |
def _join_chunks(chunks: list[TextChunk], separator: str) -> Optional[TextChunk]:
|
|
|
|
|
|
|
191 |
text = separator.join([text for text, _ in chunks])
|
192 |
text = text.strip()
|
193 |
if text == '':
|
|
|
41 |
|
42 |
TextChunk = tuple[str, tuple[int, int]]
|
43 |
|
44 |
+
DEFAULT_SEPARATORS = ['```', '\n\n', '\n', ' ', '']
|
45 |
CHUNK_SIZE = 400
|
46 |
CHUNK_OVERLAP = 50
|
47 |
|
|
|
99 |
|
100 |
offset = 0
|
101 |
chunks: list[TextChunk] = []
|
102 |
+
open_code_block = False
|
103 |
end_index = text.find(separator, offset)
|
104 |
|
105 |
while end_index >= 0:
|
106 |
+
if separator == '```':
|
107 |
+
# We want to keep the code block seperators as part of the text chunk.
|
108 |
+
start = max(0, offset - len(separator))
|
109 |
+
if open_code_block:
|
110 |
+
end = end_index + len(separator)
|
111 |
+
open_code_block = False
|
112 |
+
else:
|
113 |
+
end = end_index
|
114 |
+
open_code_block = True
|
115 |
+
else:
|
116 |
+
start = offset
|
117 |
+
end = end_index
|
118 |
+
|
119 |
+
chunks.append((text[start:end], (start, end)))
|
120 |
offset = end_index + len(separator)
|
121 |
end_index = text.find(separator, offset)
|
122 |
|
|
|
134 |
"""Split incoming text and return chunks."""
|
135 |
|
136 |
def _merge_splits(splits: Iterable[TextChunk], separator: str) -> list[TextChunk]:
|
137 |
+
# We now want to combine these smaller pieces into medium size chunks to send to the LLM.
|
138 |
+
separator_len = 0 if separator == '```' else length_function(separator)
|
|
|
139 |
|
140 |
docs: list[TextChunk] = []
|
141 |
current_doc: list[TextChunk] = []
|
|
|
170 |
final_chunks: list[TextChunk] = []
|
171 |
# Get appropriate separator to use
|
172 |
separator = separators[-1]
|
173 |
+
new_separators: list[str] = []
|
174 |
+
for i, _s in enumerate(separators):
|
175 |
if _s == '':
|
176 |
separator = _s
|
177 |
break
|
178 |
if _s in text:
|
179 |
separator = _s
|
180 |
+
new_separators = separators[i + 1:]
|
181 |
break
|
182 |
# Now that we have the separator, split the text.
|
183 |
splits = _sep_split(text, separator)
|
|
|
192 |
merged_text = _merge_splits(good_splits, separator)
|
193 |
final_chunks.extend(merged_text)
|
194 |
good_splits = []
|
195 |
+
other_chunks = split_text(text_chunk, chunk_size, chunk_overlap, new_separators,
|
196 |
+
length_function)
|
197 |
# Adjust the offsets of the other chunks.
|
198 |
other_chunks = [(t, (s + start, e + start)) for t, (s, e) in other_chunks]
|
199 |
final_chunks.extend(other_chunks)
|
|
|
204 |
|
205 |
|
206 |
def _join_chunks(chunks: list[TextChunk], separator: str) -> Optional[TextChunk]:
|
207 |
+
if separator == '```':
|
208 |
+
# Code blocks already have the separator.
|
209 |
+
separator = ''
|
210 |
text = separator.join([text for text, _ in chunks])
|
211 |
text = text.strip()
|
212 |
if text == '':
|
lilac/signals/substring_search.py
CHANGED
@@ -14,7 +14,6 @@ class SubstringSignal(Signal):
|
|
14 |
name = 'substring_search'
|
15 |
display_name = 'Substring Search'
|
16 |
input_type = SignalInputType.TEXT
|
17 |
-
compute_type = SignalInputType.TEXT
|
18 |
|
19 |
query: str
|
20 |
|
|
|
14 |
name = 'substring_search'
|
15 |
display_name = 'Substring Search'
|
16 |
input_type = SignalInputType.TEXT
|
|
|
17 |
|
18 |
query: str
|
19 |
|
lilac/signals/text_statistics.py
CHANGED
@@ -13,6 +13,7 @@ SPACY_BATCH_SIZE = 128
|
|
13 |
NUM_CHARS = 'num_characters'
|
14 |
READABILITY = 'readability'
|
15 |
TYPE_TOKEN_RATIO = 'log(type_token_ratio)'
|
|
|
16 |
|
17 |
if TYPE_CHECKING:
|
18 |
from spacy import Language
|
@@ -28,11 +29,14 @@ class TextStatisticsSignal(TextSignal):
|
|
28 |
|
29 |
@override
|
30 |
def fields(self) -> Field:
|
31 |
-
return field(
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
36 |
|
37 |
@override
|
38 |
def setup(self) -> None:
|
@@ -71,15 +75,27 @@ class TextStatisticsSignal(TextSignal):
|
|
71 |
# available statistics.
|
72 |
corpus = textacy.corpus.Corpus(lang=self._lang, data=batch)
|
73 |
for doc in cast(Iterable['Doc'], corpus):
|
74 |
-
if not
|
75 |
yield None
|
76 |
continue
|
77 |
-
|
78 |
-
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
yield {
|
82 |
NUM_CHARS: num_chars,
|
83 |
READABILITY: readability,
|
84 |
TYPE_TOKEN_RATIO: ttr,
|
|
|
85 |
}
|
|
|
13 |
NUM_CHARS = 'num_characters'
|
14 |
READABILITY = 'readability'
|
15 |
TYPE_TOKEN_RATIO = 'log(type_token_ratio)'
|
16 |
+
FRAC_NON_ASCII = 'frac_non_ascii'
|
17 |
|
18 |
if TYPE_CHECKING:
|
19 |
from spacy import Language
|
|
|
29 |
|
30 |
@override
|
31 |
def fields(self) -> Field:
|
32 |
+
return field(
|
33 |
+
fields={
|
34 |
+
NUM_CHARS: 'int32',
|
35 |
+
READABILITY: 'float32',
|
36 |
+
TYPE_TOKEN_RATIO: 'float32',
|
37 |
+
FRAC_NON_ASCII: field(
|
38 |
+
'float32', bins=[('Low', None, 0.15), ('Medium', 0.15, 0.3), ('High', 0.3, None)])
|
39 |
+
})
|
40 |
|
41 |
@override
|
42 |
def setup(self) -> None:
|
|
|
75 |
# available statistics.
|
76 |
corpus = textacy.corpus.Corpus(lang=self._lang, data=batch)
|
77 |
for doc in cast(Iterable['Doc'], corpus):
|
78 |
+
if not doc or not doc.text.strip():
|
79 |
yield None
|
80 |
continue
|
81 |
+
try:
|
82 |
+
readability = text_stats.readability.automated_readability_index(doc)
|
83 |
+
except ZeroDivisionError:
|
84 |
+
readability = None
|
85 |
+
try:
|
86 |
+
ttr = text_stats.diversity.log_ttr(doc)
|
87 |
+
except ValueError:
|
88 |
+
ttr = None
|
89 |
+
num_chars = len(doc.text)
|
90 |
+
num_non_ascii = 0
|
91 |
+
for c in doc.text:
|
92 |
+
if ord(c) >= 128:
|
93 |
+
num_non_ascii += 1
|
94 |
+
frac_non_ascii = num_non_ascii / num_chars if num_chars else 0
|
95 |
|
96 |
yield {
|
97 |
NUM_CHARS: num_chars,
|
98 |
READABILITY: readability,
|
99 |
TYPE_TOKEN_RATIO: ttr,
|
100 |
+
FRAC_NON_ASCII: frac_non_ascii
|
101 |
}
|
lilac/web/_app/immutable/assets/0.d7803630.css
ADDED
The diff for this file is too large to render.
See raw diff
|
|
lilac/web/_app/immutable/assets/ConceptView.98f1ad48.css
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
button.svelte-d3v0kx{width:100%;padding:.5rem 1rem;text-align:left;--tw-text-opacity:1;color:rgb(31 41 55 / var(--tw-text-opacity))}button.svelte-d3v0kx:hover{--tw-bg-opacity:1;background-color:rgb(229 231 235 / var(--tw-bg-opacity));--tw-text-opacity:1;color:rgb(0 0 0 / var(--tw-text-opacity))}button[data-active=true].svelte-d3v0kx{--tw-bg-opacity:1;background-color:rgb(209 213 219 / var(--tw-bg-opacity));--tw-text-opacity:1;color:rgb(0 0 0 / var(--tw-text-opacity))}.bx--tag{margin:0}.concept-score-pill .bx--tooltip__label{margin-right:.25rem;display:inline-block;height:100%;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;max-width:5rem}.concept-score-pill .bx--tooltip__content{display:flex;flex-direction:column;align-items:center}table.svelte-zc5pc5 td.svelte-zc5pc5{padding:.25rem .5rem}.embedding-badge-nolabel .bx--tooltip__label .bx--tooltip__trigger{margin:0}.embedding-badge-nolabel .bx--tag__custom-icon{margin-right:0}.more-button .bx--btn{height:1.5rem;width:12rem}.named-value-name.svelte-1689hje{max-width:15rem}.highlight-span.svelte-8ox5pu{padding-top:1.5px;padding-bottom:1.5px}.highlight-span pre{--tw-bg-opacity:1;background-color:rgb(226 232 240 / var(--tw-bg-opacity));font-size:.875rem;line-height:1.25rem}.highlight-span p,.highlight-span pre{margin-top:.75rem;margin-bottom:.75rem}.highlight-span p:first-child{display:inline!important}.highlight-span p:last-child{display:inline!important}.highlight-span p,.highlight-span h1{background-color:inherit}.highlight-span p{font-size:.875rem;line-height:1.25rem;font-weight:inherit}.dataset-link.bx--btn{min-height:0px}
|