nsthorat commited on
Commit
51b77d2
1 Parent(s): 8019093
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .env.demo +1 -1
  2. .gitattributes +0 -7
  3. data/concept/lilac/legal-termination/concept.json +0 -184
  4. data/concept/lilac/legal-termination/sbert.pkl +0 -0
  5. data/concept/lilac/negative-sentiment/concept.json +0 -634
  6. data/concept/lilac/negative-sentiment/sbert.pkl +0 -0
  7. data/concept/lilac/positive-sentiment/concept.json +0 -564
  8. data/concept/lilac/positive-sentiment/sbert.pkl +0 -0
  9. data/concept/lilac/profanity/concept.json +0 -0
  10. data/concept/lilac/profanity/openai.pkl +0 -3
  11. data/concept/lilac/profanity/sbert.pkl +0 -0
  12. data/concept/lilac/toxicity/concept.json +0 -0
  13. data/concept/lilac/toxicity/sbert.pkl +0 -0
  14. data/datasets/local/spotify/data-00000-of-00001.parquet +0 -3
  15. data/datasets/local/spotify/manifest.json +0 -27
  16. data/datasets/local/spotify/settings.json +0 -1
  17. data/datasets/local/spotify/text/.concepts/local/aliens/sbert-neg-100.pkl +0 -0
  18. data/datasets/local/spotify/text/lang_detection/data-00000-of-00001.parquet +0 -3
  19. data/datasets/local/spotify/text/lang_detection/signal_manifest.json +0 -36
  20. data/datasets/local/spotify/text/sbert/data-00000-of-00001.parquet +0 -3
  21. data/datasets/local/spotify/text/sbert/embedding/local/outerspace/v34/data-00000-of-00001.parquet +0 -3
  22. data/datasets/local/spotify/text/sbert/embedding/local/outerspace/v34/signal_manifest.json +0 -64
  23. data/datasets/local/spotify/text/sbert/embeddings-00000-of-00001.keys.pkl +0 -3
  24. data/datasets/local/spotify/text/sbert/embeddings-00000-of-00001.npy +0 -3
  25. data/datasets/local/spotify/text/sbert/signal_manifest.json +0 -37
  26. lilac/concepts/concept.py +6 -7
  27. lilac/config.py +33 -30
  28. lilac/data/dataset.py +7 -7
  29. lilac/data/dataset_duckdb.py +107 -164
  30. lilac/data/dataset_test_utils.py +16 -10
  31. lilac/data/dataset_utils.py +29 -22
  32. lilac/embeddings/vector_store.py +85 -4
  33. lilac/embeddings/vector_store_numpy.py +7 -11
  34. lilac/router_concept.py +2 -2
  35. lilac/router_dataset.py +2 -9
  36. lilac/schema.py +4 -4
  37. lilac/server.py +1 -1
  38. lilac/signals/concept_scorer.py +34 -17
  39. lilac/signals/lang_detection.py +15 -22
  40. lilac/signals/minhash_dup.py +2 -2
  41. lilac/signals/near_dup.py +0 -1
  42. lilac/signals/ner.py +0 -1
  43. lilac/signals/pii.py +0 -1
  44. lilac/signals/semantic_similarity.py +29 -23
  45. lilac/signals/signal.py +34 -70
  46. lilac/signals/splitters/chunk_splitter.py +26 -7
  47. lilac/signals/substring_search.py +0 -1
  48. lilac/signals/text_statistics.py +25 -9
  49. lilac/web/_app/immutable/assets/0.d7803630.css +0 -0
  50. lilac/web/_app/immutable/assets/ConceptView.98f1ad48.css +1 -0
.env.demo CHANGED
@@ -1,4 +1,4 @@
1
  LILAC_DATA_PATH='/data'
2
  HF_HOME='/data/.huggingface'
3
- HF_DATASETS_CACHE='/data/.cache'
4
  TRANSFORMERS_CACHE='/data/.cache'
 
 
1
  LILAC_DATA_PATH='/data'
2
  HF_HOME='/data/.huggingface'
 
3
  TRANSFORMERS_CACHE='/data/.cache'
4
+ XDG_CACHE_HOME='/data/.cache'
.gitattributes DELETED
@@ -1,7 +0,0 @@
1
- data/datasets/local/spotify/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
2
- data/datasets/local/spotify/text/lang_detection/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
3
- data/datasets/local/spotify/text/sbert/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
4
- data/datasets/local/spotify/text/sbert/embedding/local/outerspace/v34/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
5
- data/datasets/local/spotify/text/sbert/embeddings-00000-of-00001.keys.pkl filter=lfs diff=lfs merge=lfs -text
6
- data/datasets/local/spotify/text/sbert/embeddings-00000-of-00001.npy filter=lfs diff=lfs merge=lfs -text
7
- data/concept/lilac/profanity/openai.pkl filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
data/concept/lilac/legal-termination/concept.json DELETED
@@ -1,184 +0,0 @@
1
- {
2
- "namespace": "lilac",
3
- "concept_name": "legal-termination",
4
- "type": "text",
5
- "data": {
6
- "731b1338cf1949958c3526c555f88058": {
7
- "label": true,
8
- "text": "In the event that any provision of this agreement is found to be unenforceable, the remaining provisions shall continue to be valid and binding.",
9
- "id": "731b1338cf1949958c3526c555f88058"
10
- },
11
- "99a20e547e38474dbc24507a1658d0c9": {
12
- "label": true,
13
- "text": "The parties agree that in the event of a natural disaster or other unforeseen event, both parties will make reasonable efforts to fulfill their obligations under this contract.",
14
- "id": "99a20e547e38474dbc24507a1658d0c9"
15
- },
16
- "3f27b47c526a4c5896a0a100024535c7": {
17
- "label": true,
18
- "text": "If any party breaches the terms of this agreement, the non-breaching party shall have the right to seek legal remedies.",
19
- "id": "3f27b47c526a4c5896a0a100024535c7"
20
- },
21
- "d403dbb1ab9c4594bc7f7dcb0ad5b333": {
22
- "label": true,
23
- "text": "This lease agreement shall survive the termination or expiration of the lease term, and continue to be binding upon the parties.",
24
- "id": "d403dbb1ab9c4594bc7f7dcb0ad5b333"
25
- },
26
- "b7deba9f7e80444abe14448f53f45c43": {
27
- "label": true,
28
- "text": "In the event of a dispute arising from this contract, the parties agree to first attempt to resolve the dispute through mediation before pursuing any legal action.",
29
- "id": "b7deba9f7e80444abe14448f53f45c43"
30
- },
31
- "a82231b490174e62aad733cb0c75024d": {
32
- "label": true,
33
- "text": "This Agreement may be terminated, and the transactions contemplated hereby may be abandoned, at any time prior to the Effective Time, whether prior to or after the Company Stockholders' Approval:",
34
- "id": "a82231b490174e62aad733cb0c75024d"
35
- },
36
- "160b25dbf14e4759a0065bbd652ce33f": {
37
- "label": true,
38
- "text": "This Agreement may be terminated and abandoned at any time prior to the Effective Time of the Merger, whether before or after the Company Stockholder Approval:",
39
- "id": "160b25dbf14e4759a0065bbd652ce33f"
40
- },
41
- "8f5f9f96b16441228bb0c9b8a14c4e25": {
42
- "label": false,
43
- "text": "any jurisdiction, then such provision shall, as to such jurisdiction, be modified or restricted to the extent necessary to make such provision valid, binding and enforceable, or if such provision cannot be so modified or restricted, then such provision shall, as to such jurisdiction, be deemed to be excised from this Agreement; provided, however, that the legality, binding effect and",
44
- "id": "8f5f9f96b16441228bb0c9b8a14c4e25"
45
- },
46
- "87b6c31b04a346b4a3e0da8d2cc5a7ac": {
47
- "label": true,
48
- "text": "This Agreement shall terminate automatically without any further action by any party hereto upon the earliest to occur of (a) the Effective Time of the Merger, (b) the termination of the Merger Agreement in accordance with its terms and (c) any amendment or other modification of the Merger Agreement that reduces the amount of the Merger Consideration or provides that the Merger Consideration shall",
49
- "id": "87b6c31b04a346b4a3e0da8d2cc5a7ac"
50
- },
51
- "985344f7ecfb41f4a69ba101973221a1": {
52
- "label": false,
53
- "text": " During the Employment Period, the Corporation shall pay ----------- the Executive a base salary which, as of the commencement of the Employment Period, shall be at an annual rate of Two Hundred Fifty Thousand Dollars ($250,000). The base salary shall be payable in equal periodic installments which are not less frequent than the periodic installments in effect for salaries of other senior",
54
- "id": "985344f7ecfb41f4a69ba101973221a1"
55
- },
56
- "5d53ff48376046fdab41e95c7f4bad54": {
57
- "label": true,
58
- "text": "This Agreement may be terminated at any time prior to the Closing Date solely:",
59
- "id": "5d53ff48376046fdab41e95c7f4bad54"
60
- },
61
- "bdeb785be2154b21b4eb052466fa9bcb": {
62
- "label": true,
63
- "text": "(a) This Agreement may be terminated by you by notice to the Company at any time prior to the Closing Date if any of the following has occurred: (i) since the respective dates as of which information is given in the Registration Statement and the Prospectus, any material adverse change or any development involving a prospective material adverse change in or affecting the earnings, busi ness,",
64
- "id": "bdeb785be2154b21b4eb052466fa9bcb"
65
- },
66
- "fe6871e9070441f8a9e4b3db26b077d7": {
67
- "label": true,
68
- "text": "Section 3(b), this Section 7 and Section 8 of this Agreement shall survive a termination of this Agreement pursuant to (a) or (b) above in this Section 7 until the date that is two years following the date of such termination. Notwithstanding anything else to the contrary contained herein or in the Merger Agreement, if the Effective Time occurs, the representations and warranties contained in",
69
- "id": "fe6871e9070441f8a9e4b3db26b077d7"
70
- },
71
- "bf1a51751d0748e58c344aec8e5fc789": {
72
- "label": false,
73
- "text": "This Agreement may be executed in one or more counterparts (including counterparts executed and delivered by facsimile, which shall be as counterparts executed and delivered manually), all of which shall be considered one and the same agreement and shall become effective when one or more counterparts have been signed by each of the parties and delivered to the other party, it being understood that",
74
- "id": "bf1a51751d0748e58c344aec8e5fc789"
75
- },
76
- "bc1b2affa6d848fd92d4dee033e30659": {
77
- "label": false,
78
- "text": "would, in your judgment, make it impracticable or inadvisable to market the Units or to enforce contracts for the sale of the Units, (iii) suspension of trading in securities generally on the New York Stock Exchange, the American Stock Exchange or the Nasdaq National Market or limitation on prices (other than limitations on hours or numbers of days of trading) for securities on any such Exchange,",
79
- "id": "bc1b2affa6d848fd92d4dee033e30659"
80
- },
81
- "67a73d5887f74a91bed190ca8f64b17c": {
82
- "label": false,
83
- "text": " The authorized capital stock of FM consists of 1,000 shares of Common Stock, no par value each, of which 1,000 shares are issued and outstanding. There are no outstanding or authorized options, warrants, calls, subscriptions, rights (including any preemptive rights or rights of first refusal), agreements or commitments of any character obligating FM to issue any stock or any other Equity",
84
- "id": "67a73d5887f74a91bed190ca8f64b17c"
85
- },
86
- "025b2ca5147849c8a921d9aaa31cd9cd": {
87
- "label": false,
88
- "text": "Taxes that are being contested in good faith by appropriate proceedings, provided that Holdings, the Borrower or Restricted Subsidiary, as the case may be, has set aside on its books adequate reserves therefor in accordance with GAAP.",
89
- "id": "025b2ca5147849c8a921d9aaa31cd9cd"
90
- },
91
- "76acff27f13743f4822a094c707d8b75": {
92
- "label": false,
93
- "text": "have been a suspension or material limitation in trading in the Company\u2019s common stock on the New York Stock Exchange; (iii) there shall have been a general moratorium on commercial banking activities declared by either federal or New York state authorities or a material disruption in commercial banking or securities settlement or clearance services in the United States; (iv) there shall have been",
94
- "id": "76acff27f13743f4822a094c707d8b75"
95
- },
96
- "b11a95c0eb564445b1a473e90622f861": {
97
- "label": true,
98
- "text": "10.1. This Agreement will terminate:",
99
- "id": "b11a95c0eb564445b1a473e90622f861"
100
- },
101
- "d536428a02084d94ba18d412851cb913": {
102
- "label": false,
103
- "text": "may not be limited to his Base Salary and that the Employee may receive an annual bonus in the amount, if any, determined annually by the Employer. The Employee shall also participate in employee compensation and benefit plans available generally to executives of the Employer (including, without limitation, any tax-qualified profit sharing plan, nonqualified profit sharing plan, life insurance",
104
- "id": "d536428a02084d94ba18d412851cb913"
105
- },
106
- "368bb1d9c7d0419d9ca58f28565eeb2e": {
107
- "label": true,
108
- "text": "This Agreement may be terminated in the absolute discretion of the Representatives, by notice to the Bank, if after execution and delivery of this Agreement and prior to the Closing Date (i) there has been, since the date of this Agreement or since the respective dates as of which information is given in the Registration Statement, the Time of Sale Information or the Prospectus, any material",
109
- "id": "368bb1d9c7d0419d9ca58f28565eeb2e"
110
- },
111
- "1b5fd7b037a84404bf85c858953c79e8": {
112
- "label": true,
113
- "text": "however, (i) the right to terminate this Agreement under this Section 8 shall not be available to such Buyer if the failure of the transactions contemplated by this Agreement to have been consummated by such date is the result of such Buyer\u2019s breach of this Agreement and (ii) the abandonment of the sale and purchase of the Notes and the Warrants shall be applicable only to such Buyer providing",
114
- "id": "1b5fd7b037a84404bf85c858953c79e8"
115
- },
116
- "6d5a23d2663f457cab96df03d9dc8ab7": {
117
- "label": true,
118
- "text": "In addition, any Stockholder may terminate this Agreement if Weatherford, WEUS, or the Company breaches any representation, warranty, covenant or other agreement contained in the Merger Agreement that (A) would give rise to the failure of Weatherford, WEUS, or the Company to satisfy any condition set forth in Section 8.2(a) thereof, and (B) cannot be or has not been cured within 45 days after the",
119
- "id": "6d5a23d2663f457cab96df03d9dc8ab7"
120
- },
121
- "4a8223a48f83491b9b3eafd7ad37baf9": {
122
- "label": true,
123
- "text": "The obligations of the Underwriters hereunder may be terminated by the Representatives, in their absolute discretion, by notice given to and received by the Depositor or the Bank prior to delivery of and payment for the Notes if, prior to that time, any of the events described in Section 5(v) shall have occurred or any of the other conditions described in Section 5 shall not be satisfied.",
124
- "id": "4a8223a48f83491b9b3eafd7ad37baf9"
125
- },
126
- "fbb152eae00c440bb2d0df0fbd82c262": {
127
- "label": true,
128
- "text": "Either of the parties hereto may terminate this Agreement by giving to the other party a notice in writing specifying the date of such termination, which shall be not less than 60 days after the date of receipt of such notice. In the event such notice is given by the Customer, it shall be accompanied by a copy of a resolution of the Board of Directors of the Customer, certified by its Secretary,",
129
- "id": "fbb152eae00c440bb2d0df0fbd82c262"
130
- },
131
- "1d21880f426c45ada31409d22815cc87": {
132
- "label": false,
133
- "text": "Prospectus or the Final Prospectus (exclusive of any amendment or supplement thereof or thereto after the date hereof).",
134
- "id": "1d21880f426c45ada31409d22815cc87"
135
- },
136
- "795cac72a3504740bc7401a84fc6fba4": {
137
- "label": true,
138
- "text": "This Agreement may be terminated by the Customer or the Bank by giving ninety (90) days written notice to the other, provided that such notice to the Bank shall specify the names of the persons to whom the Bank shall deliver the Assets in the Accounts. If notice of termination is given by the Bank, the Customer shall, within ninety (90) days following receipt of the notice, deliver to the Bank Instructions specifying the names of the persons to whom the Bank shall deliver the Assets.",
139
- "id": "795cac72a3504740bc7401a84fc6fba4"
140
- },
141
- "3b82e6eba4894ac0b9f7f12aba2aab2e": {
142
- "label": false,
143
- "text": "of this Agreement, or to Authorized Persons, or may continue to hold the Assets until Instructions are provided to the Bank.",
144
- "id": "3b82e6eba4894ac0b9f7f12aba2aab2e"
145
- },
146
- "da16bd0e9dce4d4c87400eab61b9b14c": {
147
- "label": false,
148
- "text": "into force of the Convention. In such event, the Convention shall cease to have effect:",
149
- "id": "da16bd0e9dce4d4c87400eab61b9b14c"
150
- },
151
- "02cc328109984db094b0b02caec0d575": {
152
- "label": true,
153
- "text": "Survival. The rights and obligations contained in Sections 3 (\u201cOwnership of Work Product\u201d), 4 (\u201cOther Rights\u201d), 5 (\u201cLicense to Preexisting IP\u201d), 6 (\u201cRepresentations and Warranties\u201d), 8 (\u201cConfidential Information\u201d) and 12 (\u201cNon-solicitation\u201d) will survive any termination or expiration of this Agreement. ",
154
- "id": "02cc328109984db094b0b02caec0d575"
155
- },
156
- "f8edf65d9acf4ff4a04459a3492ac426": {
157
- "label": false,
158
- "text": "Severability. Should any provisions of this Agreement be held by a court of law to be illegal, invalid or unenforceable, the legality, validity and enforceability of the remaining provisions of this Agreement will not be affected or impaired thereby. ",
159
- "id": "f8edf65d9acf4ff4a04459a3492ac426"
160
- },
161
- "5a8517f359494ead8c11b6aff440480d": {
162
- "label": false,
163
- "text": "\u0095\tCommitted to deliver the best, we leave no room for customer grievances.\r\n\r\n",
164
- "id": "5a8517f359494ead8c11b6aff440480d"
165
- },
166
- "a47d327d0f6e46fc861f86b2e0e54a2f": {
167
- "label": false,
168
- "text": "the due diligence and using our agreement creator to close the deal successfully. \r",
169
- "id": "a47d327d0f6e46fc861f86b2e0e54a2f"
170
- },
171
- "811d0dcc92e14c5c881e903c7d4ff7b6": {
172
- "label": false,
173
- "text": "in accordance with customary procedures in the relevant markets, but in any event for a settlement period no longer than three months following the date of such commitment.",
174
- "id": "811d0dcc92e14c5c881e903c7d4ff7b6"
175
- },
176
- "907f92e0d5704418944a559a4bfb96c7": {
177
- "label": false,
178
- "text": "terminate in accordance with Section 2 of the Investors\u2019 Rights Agreement.",
179
- "id": "907f92e0d5704418944a559a4bfb96c7"
180
- }
181
- },
182
- "version": 33,
183
- "description": "Termination or survival clause in a legal document"
184
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/concept/lilac/legal-termination/sbert.pkl DELETED
Binary file (33.8 kB)
 
data/concept/lilac/negative-sentiment/concept.json DELETED
@@ -1,634 +0,0 @@
1
- {
2
- "namespace": "lilac",
3
- "concept_name": "negative-sentiment",
4
- "type": "text",
5
- "data": {
6
- "0": {
7
- "label": true,
8
- "text": "Starting To Be Annoyed By Becky...: I'm not sure why I keep reading these books, but I guess it's because I've read the first two so I'll keep reading the rest of the books. In the first book, I really found it amusing. I was a little annoyed by the fact that Becky couldn't stop spending, but then again that's why she is called a Shopaholic. In the second book, I felt more of the same it was just magniifed more. Now in the third book, I'm just down right annoyed by Becky Bloomwood. In this book, she wasn't going on crazy shopping sprees, just planning two different weddings because she was afraid to tell each person and because I feel she's really selfish. Still, I read the book because I wanted to see how she could get herself out of another situation. I will say that I love her friends Suze and Danny, her client Laurel and her husband Luke. Maybe that's why I keep reading. I will read the next book, but I'm sure I'll be just as annoyed when I'm done.",
9
- "id": "0"
10
- },
11
- "1": {
12
- "label": true,
13
- "text": "the cover is fine - the pool is horrible: The entire pool was horrible. The cover was fine once we got it on, but we finally got rid of the pool after 2 weeks because it was so hard to set up and keep clean.",
14
- "id": "1"
15
- },
16
- "2": {
17
- "label": false,
18
- "text": "Good album, not their best.: This album is probably the most marketable and radio friendly of all of dashboard's albums. For the peripheral listener it may be the right one to get to introduce you to this band. But as a Dashboard fan of 5 or so years I truly hope they return to their original sound for their next work. Not for the listen-ability but for the show. To this day the fans react best to the songs from \"Places\" or \"A Mark, A Mission.\" I recommend this album to everyone but I also recommend any and all of their other work.",
19
- "id": "2"
20
- },
21
- "3": {
22
- "label": true,
23
- "text": "This is a horror novel, right?: Never one to pass up any vampire novel, I purchased Sips because the description seemed interesting. Vampires, Marquis de Sade, fetishism, yada yada yada. If this is a comedy, I give it 4 stars; however, I'll give it 1 star as a horror novel. Sade was rather boring; I would think a character as intense and multi-faceted as the Marquis de Sade would make for a more interesting vampire. The writing style isn't too bad, but overall I found the characters to be mildly amusing at best. The plot was thin, the end was anti-climactic, and the vampires were not very frightening. The book had little suspense, and it leaves a mile-wide opening for a sequel at the conclusion. I would, however, like to see something more of the vampire mutants lurking in the graveyard. They were the most riveting of any of the characters.",
24
- "id": "3"
25
- },
26
- "4": {
27
- "label": false,
28
- "text": "Superb mix of global non secular musical denominations: I first heard Ms. Pook's music on the \"Eyes Wide Shut\" soundtrack (the masquerade ball scene) and was blown away; if ever there was a necessity for music to permeate a scene in a film this was it. She incorporates a blend of the traditional songs from faiths across continents and mixes them, for lack of a better comparison than similar to your quintessential raver d.j. (though these are better and definitively more original :) \"Oppenheimer\" is my favorite, and if you let the last track run for a few minutes a portion of the song will play once more. I can't wait to hear more of her stuff - these hymns are awesome.",
29
- "id": "4"
30
- },
31
- "5": {
32
- "label": false,
33
- "text": "A moving and suspenseful story!: For anyone familiar with the occult, this book is likely to raise hairs on the back of your neck as you read. Even if you're not, the storyline is suspenseful and fascinating, and the characters evoke great sympathy and admiration. An excellent read.",
34
- "id": "5"
35
- },
36
- "6": {
37
- "label": false,
38
- "text": "Simple & Easy to Use - A Practical approach to eating out: This guide is extremely to use. It provides sample menus that you'd see at Chinese, Indian and Thai restaurants. Then you are provided with descriptions of each dish and how it is prepared and the ingredients used. From there you are provided with specific considerations as to how the preparation or ingredient list may affect you if you have Gluten or Allergen issues.This book is the size of a passport and very organized and well written. The Chinese, Indian and Thai Cuisine Passport is perfect for making choices while traveling, or while dining at your favorite local restaurant.",
39
- "id": "6"
40
- },
41
- "7": {
42
- "label": true,
43
- "text": "Being Fair....I am a fan of hers: and I really enjoyed her previous works, more than I could have ever imagined, but this record is horrible. The songs are trite, the lyrics are incredibly boring, indulgent and immature. The music is pop staple, with forgetable melodies and repetative chorus lines, I feel as if the studio wrote the entire album for her while she was sleeping, this just doesn't speak to any of her previous works at all. This album fits on the same shelf with a Nickelodeon-themed CD. Instead of heading in the direction of an artist like Alanis Morrisette, she is going backward and joining the ranks of Hannah Montana and the Naked Brothers Band. She is a great artist and her first two records are amazing. She is better than this CD and I am looking forward to her next effort.",
44
- "id": "7"
45
- },
46
- "8": {
47
- "label": true,
48
- "text": "Sucked: I thought the DVD sucked tremendously. It was very boring and if I could, I would return it for a refund. There was only one \"small\" clip of Dylan himself. I'm very disappointed.",
49
- "id": "8"
50
- },
51
- "9": {
52
- "label": false,
53
- "text": "Excellent product: Easy to install. If you have a newer furnace you probably do not need the swail switch as the HE220A comes with a Humistat which can be connected to the furnace. They recommend the Honeywell 32005847-001 Installation Kit, Bypass which is a little pricey and you can probably buy the pieces of this kit cheaper individually from Home Depot or Lowes or ACO as well as the filters.",
54
- "id": "9"
55
- },
56
- "10": {
57
- "label": false,
58
- "text": "Very happy.: I am very happy with this trashcan. I was unable to find one in the stores to fit the space in my cabinet, but this one does the job. It is very sturdy and looks like it will put up with years of use.",
59
- "id": "10"
60
- },
61
- "11": {
62
- "label": true,
63
- "text": "These aren't Throughbreds!: This makes me so mad. All these new authors are coming and changing the series. Nothings the same anymore and the plots are repeditive. Don't even bother reading these books until #32 these are like a different series. I don't know excactly what's happing but these new authors suck!",
64
- "id": "11"
65
- },
66
- "12": {
67
- "label": true,
68
- "text": "Large and slow are a bad combination.: I bought this TV and returned it a week later, because it blurred so badly with motion that sports were unwatchable. I ended up buying a smaller Sony XBR4, and I have none of the issues (plus the picture is far, far better).This has nothing to do with 60 vs 120Hz. That is more important for DVDs and Blu-Ray signals that are 24fps (which doesn't divide evenly into 60 but does for 120). The LT52133 has an 8ms response time, which is extremely slow. A decent LCD should be 5 or lower.If you want an LCD, choose speed and quality over size. If you want size and quality but want to spend less, buy a plasma. Don't buy a big, cheap, slow LCD!I gave it 2 stars because I like the interface and remote.",
69
- "id": "12"
70
- },
71
- "13": {
72
- "label": true,
73
- "text": "Skip it: This movie is very poorly written and the result is not distressing, just lame. The actors do their best but from very early on it is obvious that the material gives them nothing to work with. Fans of Colin Firth will experience a certain dim level of enjoyment. Minnie Driver is a treat but her character is no better written than the others. Vermont locations are worth something. With one or two moments of exception it's neither comedic nor romantic.",
74
- "id": "13"
75
- },
76
- "14": {
77
- "label": false,
78
- "text": "Belive it i went to the concert?: hi everyone let me tell you i went to the concert i was amazed with what i saw cher was awsome i tell you buy the dvd. as i sat in front of the stage cher was doing a great job to us the she is living proof . So i urge you to buy it?",
79
- "id": "14"
80
- },
81
- "15": {
82
- "label": false,
83
- "text": "Vale la pena.: En este libro se narra de una forma muy interesante la vida de una familia en particular. Lo que mas me gusto de este libro fue la manera en que la autora describe a lo largo del libro las personalidades de los sujetos envueltos en la novela; que vienen a ser muy distintos y extremos, lo cual, intensifica el drama... Definitivamente es un buen libro y lo recomiendo a todos.",
84
- "id": "15"
85
- },
86
- "16": {
87
- "label": false,
88
- "text": "Nummie Children's story: I ordered this book for our grandchildren. Two boys 5 & 3 and a 4 month old girl. All love the story. The mouse is determined.",
89
- "id": "16"
90
- },
91
- "17": {
92
- "label": true,
93
- "text": "Seem to be alone on this one: Looking at the other reviews, I seem to be the only one that was disappointed with this book. The content is too babyish in most of it for older tweens and the more \"grown up\" content would be over a younger tween's head. I had a quick read through and with every paged turned, I thought duh. I'll be looking around for another book shortly.",
94
- "id": "17"
95
- },
96
- "18": {
97
- "label": false,
98
- "text": "Best yet: by far the best EA game yet. I especially like the easy controls and kick - a graphics. the playbook is extremely accurate and detailed. Also the fight songs and cheerleaders were a nice touch. this is an excellent game and worth checking out.",
99
- "id": "18"
100
- },
101
- "19": {
102
- "label": true,
103
- "text": "washed out: A bit like Simply Reds version of the Valentine bros hit \"Moneys too tight to mention\" - this cover version has nothing of the driving energy that characterised the original recording.",
104
- "id": "19"
105
- },
106
- "20": {
107
- "label": false,
108
- "text": "great water bottle: I love this bottle it is great. I like knowing it is non toxic and it just works very well. You can have it full and lay it down and it doesn't leak at all.",
109
- "id": "20"
110
- },
111
- "21": {
112
- "label": false,
113
- "text": "Nice goggles: I am pretty happy with these goggles. They work well during swim workouts in the pool. I do notice a little bit of fogging from time to time. I had hoped to wear them during an upcoming triathlon, but based on a few instances where they slipped a little in the pool I am concerned that they won't be secure enough. I will keep using them in the pool, but will likely get different ones for open water races.",
114
- "id": "21"
115
- },
116
- "22": {
117
- "label": true,
118
- "text": "aaahhh nnnoooooo!: Hopefully the last film in one of the worst horror trilogys ever made. This series pretty much ruined the horror film for years to come, for one its too self aware, thats incredibley annoying, second everyone acts like they are on Friends or some sitcom. The acting is just plain bad and unconvincing. Now the gore, if you're going with material this weak you should load it up with disgusting violence, is there any in the Scream series? No.Everyone went to see this movie just to see who THE KILLER is. This movie sets low standards to be met, you expect alot of people to die, one shock, then we find out who the killer is, then you go home. Every horror film being made today is like that, there's nothing new or exciting or risk taking, its the same stuff over and over and people are laping it up like dog food.This film is what you've come to expect, bad acting, some people die and we eventually find out who the killer is and all is merry and well. Pathetic.",
119
- "id": "22"
120
- },
121
- "23": {
122
- "label": false,
123
- "text": "A classic of its kind: This movie is a classic of its kind and much better that a lot of movies, that followed. It is not one of the best, but it still deserves five stars...",
124
- "id": "23"
125
- },
126
- "24": {
127
- "label": true,
128
- "text": "Nice suite, but Virtual PC 7 disappoints on my G5: I purchased the upgrade since I'd already bought both Office v.X and Virtual PC 6.1 last year.The biggest letdown is that Microsoft's promised support for the G5 is nearly non-existent. I have a dual processor G5 with an ATI Radeon 9800 card (Apple), and after trying to install Virtual PC 7 three times, I cannot get a VM to work. It did install (and work) flawlessly on my G4 Powerbook. Googling for reviews finds it's very hit or miss, but if (when) it misses, you'll regret investing the extra $$$ in an immature product.",
129
- "id": "24"
130
- },
131
- "25": {
132
- "label": true,
133
- "text": "Okay player, don't expect a miracle: I bought this DVD player at Circuit City earlier this yr for about a $100. I hooked it up to a 47\" Vizio LCD (which by the way has an awesome picture) using a HDMI cable. After fine tuning this product, I was very, very, very diasppointed. The picture was very \"grainy\" (lots of pixels). I have a $35 DVD player that only utilizes an s-video cable that produces a much more crisp picture. Be warned, the picture stinks.",
134
- "id": "25"
135
- },
136
- "26": {
137
- "label": false,
138
- "text": "A revelation of the science of consciousness evolution and all natural growth: Here is a readable and fascinating account of the development of the new science of chaos theory, the only body of ideas that describes how the natural world as experienced by human beings emerges out of basic quantum processes. The different explorers and innovators of the new science are introduced in a personable way that will enchant the interested reader.",
139
- "id": "26"
140
- },
141
- "27": {
142
- "label": true,
143
- "text": "Don't say that I didn't warn ya' !: I'm absolutely convinced that Delbert McClinton had no controlover the release of this CD. I rated it 1 star simplybecause there is no 0 star rating ! In actuality , I am not certain that the vocalist on this recording IS Delbert McClinton. Only on the Mr. Pitiful track is there any similarity at all to Delbert's voice. This is the perfect CD for someone with money to burn who would like to have a recording of a 1960's garage band recorded in a garage and who should be working in a garage ! Delbert fans...run fast and run far away from this ! END",
144
- "id": "27"
145
- },
146
- "28": {
147
- "label": true,
148
- "text": "This item is not available: I ordered this unit on February 7th. Every time I checked back on the status of the order, it read \"not shipped\" and the estimated shipping date got moved out. I really don't think this unit is avaialble from the company anytime soon. I cancelled the order.",
149
- "id": "28"
150
- },
151
- "29": {
152
- "label": true,
153
- "text": "I used to like ABBA...: I used to like ABBA, until I saw Mama Mia! A horribly disjointed musical, where songs feel contrived to fit into the story; a story that doesn't seem to come together. Individual songs are usually done alright, but don't segue from one to another very well.The cast butchered several of the songs, but especially S.O.S, Take A Chance On Me, and anything where Pierce Brosnan sang. On a side note, I also counted at least two violations of Chekov's Gun. And finally, I think it has a bad moral message. Which you only recognize if you manage to sit through the whole thing.If there is justice in the world, cast members without established careers won't get to have them as punishment for the worst movies I've seen since The Talented Mr. Ripley.",
154
- "id": "29"
155
- },
156
- "30": {
157
- "label": true,
158
- "text": "A complete disaster!: If you're like me, you probably wanted to check out this movie because it sounded like it really could be an excellent supernatural Gothic horror tale full of goblins and wicked things alike. Well, don't make the same mistake I did and actually watch it. It's horrible. Terrible. An honest to goodness waste of film. The acting is wretched, the film quality is rotten (it actually looks twenty years older than it is), and the plot is thin, weak, and does not give you what it's supposed to. The only reason I bothered to give this film 1 star is because of Alexis Arquette -- he's great looking, but should have left this film out of his career.",
159
- "id": "30"
160
- },
161
- "31": {
162
- "label": false,
163
- "text": "beautiful detail: I just purchased these Dover COloring Books for my mother and she loves them. The detail is out of this world and the variety of colors you can use are only limited by your inagination. HIGHLY RECOMMENDED!",
164
- "id": "31"
165
- },
166
- "32": {
167
- "label": true,
168
- "text": "Very disappointed: I looked forward to getting this movie as I had heard many good things about it but it was nothing like I had imagined or been led to believe. There is very little actual history in it or real Christian experience except for the background because the main focus is a soap opera style romance and caricature figures. I agree with the reviewer who described it as a mixture of \"tawdry Hollywood sex\" somehow interspersed with a vague nod to Christianity. The only decent scene was the arena scene where the Christians are going to their deaths singing hymns - but that's not enough to make it a great or even a good movie. Not personally to my taste anyway.",
169
- "id": "32"
170
- },
171
- "33": {
172
- "label": true,
173
- "text": "Unreliable minikit: I bought this minikit because it got good reviews and it would be perfect for my purposes. However it switches on and off whenever it wants, it looses contact with the phone. Very often the on/off button works only in a horizontal position (?) I use a Treo 650, which is on the compatible phone list. When I contacted Parrot, they said it wasn't (?) At last I opened the unit, but there are no moving parts inside except the micro switches. It is giving me a headache, so I will go searching for an alternative.",
174
- "id": "33"
175
- },
176
- "34": {
177
- "label": false,
178
- "text": "A Christmas Classic!: This is surely one of the best classical Christmas recordings available. Don't buy the older version, as the quality of this recording is excellent. This is one of those \"Every Christmas - Can't have Christmas without\" recordings.",
179
- "id": "34"
180
- },
181
- "35": {
182
- "label": true,
183
- "text": "too narrow: These were the narrowest pair of D size shoes I have ever tried on. I don't care how nice a shoe looks. If it don't fit it just don't fit.",
184
- "id": "35"
185
- },
186
- "36": {
187
- "label": true,
188
- "text": "Lack of extension: This earphones lack a descent extension cord. ITs very small cable, but its of good quality. Sadly, cord its too short, and the extension is useless.",
189
- "id": "36"
190
- },
191
- "37": {
192
- "label": false,
193
- "text": "Easy-Reading: This is the 3rd Southern Sisters Mystery I've read. They're easy, fast and funny murder mysteries, with lots of cute family stories intertwined in the intrigue.",
194
- "id": "37"
195
- },
196
- "38": {
197
- "label": true,
198
- "text": "it'd be great if it worked like it was supposed to: for the first 30 seconds it was lovely, but i believe that either the motor isn't powerful enough to keep the shaft rotating smoothly or 3 AA batteries just don't provide enough juice for the motor to work more than 30 seconds. it was a nice idea, but i'm rather dissapointed. the jelly material is somewhat difficult to maintain also. i think if it were hooked up to a larger battery pack it'd be WONDERFUL... which i think i may have a macgyver friend with a knack for electronics attempt to do for me.",
199
- "id": "38"
200
- },
201
- "39": {
202
- "label": false,
203
- "text": "Not Hornby's best but still good: I loved About a Boy and really, really loved the sardonic wit of High Fidelity. About a Boy is much deeper but just as cynical. Maybe even more so. The characters are richly drawn and just complex enough to keep the reader wanting more. Good read, but best to take some time with this one. Not recommended for a summer beach read.",
204
- "id": "39"
205
- },
206
- "40": {
207
- "label": true,
208
- "text": "A Disappointment: As with most Taunton Press publications, the illustrations and photographs in this book are spectacular and the organization and layout is superb. Nonetheless, I found this book disappointing. It lacks both depth and breadth. I had hoped for a detailed review of wood joinery including some of the more unusual joinery found in Japanese woodworking. This book, however, is targeted more toward the beginner. Even so, it does not cover the details and \"tricks\" of even the most basic techniques in sufficient detail to allow beginners to easily reproduce them. Consequently, it is unclear who this book was written for - not the beginner as it lacks depth, and not the advanced woodworker as it lacks breadth. Far more effort appears to have been put into appearance and organization than in content.",
209
- "id": "40"
210
- },
211
- "41": {
212
- "label": true,
213
- "text": "Horrible. Don't do it!: Great price for the item when a 6' one of these at Best Buy is $20. Thing is, the one from Best Buy fits in the outlet and stays there. This cord fits very loose and does not connect. I bought 2 of them, neither did what they were suppose to.As much as I hate to say it, but, buy the more expensive one. At least it works.",
214
- "id": "41"
215
- },
216
- "42": {
217
- "label": false,
218
- "text": "Given as a gift...: Given to my best friend as a gift. She loves it. Her fiance enjoys making coffee for her in the mornings. :)",
219
- "id": "42"
220
- },
221
- "43": {
222
- "label": false,
223
- "text": "Love the ring.: This is a nice ring. I was worried it out be thin and cheap looking, but it's not. It's a very pretty stylish ring. Go for it.",
224
- "id": "43"
225
- },
226
- "44": {
227
- "label": true,
228
- "text": "Beautiful writing Marred by One-Note Characterizations: How could Kingsolver have ruined her book with such an obvious error? Nathan is a strident paper doll that flattens the whole story. Just as bad, the author has all the narrators using the same ironic tone to decribe him, deadening their voices as well. At the same time, Kingsolver doesn't have the guts to show him doing something trully terrible. I don't trust an author who can't let the reader make up his own mind, and as a consequence I couldn't trust her views about ANYTHING in the story. I'm giving this two stars for her descriptions of the African landscape, and that is all.",
229
- "id": "44"
230
- },
231
- "45": {
232
- "label": true,
233
- "text": "Much worse than any cordless phone I've ever had: This phone cuts out only 2 rooms away from the base station. There is static noise, and callers on the other end complain about sound quality. I can't go into the garden, which used to be no problem with my old 900 MHz phone.",
234
- "id": "45"
235
- },
236
- "46": {
237
- "label": true,
238
- "text": "Waste of time & money: The first Hangover was not too bad, this one was just terrible. The acting is bad, the script is bad, everything about this movie was just bad. Do yourself a favor, don't buy this movie as it is a total waste of time and money.",
239
- "id": "46"
240
- },
241
- "47": {
242
- "label": true,
243
- "text": "Did Not Work For Me!: Impressive You Tube Video (Like a Sci-Fi Fantasy). In reality it's a high speed Easy Out so unsurprisingly it broke faster than an Easy out. This product did not work for me. The drill part did not drlil, the puller part did not pull. It was a total zero.",
244
- "id": "47"
245
- },
246
- "48": {
247
- "label": false,
248
- "text": "Excellent book, long overdue.: From a very long time women were told that looking good was of utmost importance. This was without regard to health or fitness and how age affected these parameters. Witness the whalebone and other types of corsets, the spike heeled shoes and the numerous weight loss programmes on the market (some of which are downright dangerous). Now there is a book, backed by solid research, that allows women of all ages to remain fit and healthy for a lifetime. I am certainly going to recommend this book to all the women I know.Bentley Norville",
249
- "id": "48"
250
- },
251
- "49": {
252
- "label": true,
253
- "text": "not an all star: Not a practical guide in this collecting age. Does NOT have a comprehensive list; meaning it does NOT cover all manufacturers and, more importantly, for the ones it does, only provides listings of the base set. That means no insert or variation pricing whatsoever. Also, no oddball or minor league issues are listed. Generally speaking, unless you are collecting base sets prior to the advent of inserts and alternate versions of the base set, this guide is fairly useless.",
254
- "id": "49"
255
- },
256
- "50": {
257
- "label": true,
258
- "text": "Again, second rate city, third rate writer: Just another example of Mr. Lindberg's pitiful attempt at exhibiting a strong expertise on a subject with which he is clearly obsessed. Don't waste your time with this book, either. It is poorly written and fails to engage the reader. You might consider using this book and the first book he wrote on the same subject, as a pair of bookends. That is about all they are worth.",
259
- "id": "50"
260
- },
261
- "51": {
262
- "label": false,
263
- "text": "Reality: No one should need to convince you to buy this book, you should just do it! It's so well written and worded and brings you right to the heart of a sexual reality that most people like to pretend doesn't really live and breath in their fair cities. I never again want to hear someone bad mouth a working girl for what she does. I will and do now however look at men with a curious eye wondering if they are depraved peep show window lickers :)",
264
- "id": "51"
265
- },
266
- "52": {
267
- "label": true,
268
- "text": "Bummer: Visual effects and Battle footage were great...the other 85% of the movie was just lousy fluff...",
269
- "id": "52"
270
- },
271
- "53": {
272
- "label": false,
273
- "text": "The spark of idependence: Filled with the independent spark that made us all love life at one point or another. A fun, introspective and nonsensical movie that sticks with you.",
274
- "id": "53"
275
- },
276
- "54": {
277
- "label": false,
278
- "text": "What I expected from Mirman's website. Funny. Funny. Russian.: lol, gotta love Eugene. Even when his audience doesn't initially laugh, he gets in a good zinger at himself and they laugh at that. He's witty without being condescending, and uncomplicated without seeing contrived. However, if you're not a fan of irreverant humor, this may not be for you.",
279
- "id": "54"
280
- },
281
- "55": {
282
- "label": true,
283
- "text": "Do not...repeat...do not bother!: It is not often that I offer a negative review but this compilation while attractive does not deliver at all.The foot massage gizmo is awkward and uncomfortable.The pumice stone leaves rough splinter like skin.The foot scrub doesn't reall scrub.The rotary action tool has five heads, none of which work well and you must hold the switch in place or it turns off. It is cumbersome and ineffective.The one star was initially given for a foot brush (which later lost its bristles very easily as I update the review) and a sweet smelling foot repair balm.Don't waist your money. Soak your feet and invest in an inexpensive German Titania file, smooth and coarser side, or a like product. It will last for years.",
284
- "id": "55"
285
- },
286
- "56": {
287
- "label": true,
288
- "text": "Not Sandra's Best: Ms. Brown has written better romance novels. Don't give up on her if this was your first Sandra book.The feeble female lead struggles with a 15-year crush that walks back into her life. The smug male lead acts like a jerk through most of the novel. The romance scenes grapple to muster up passion but fall short. Both of the main characters bothered me; my favorite character was the 17-year old.A quick read...about 4 hours (with interruptions) for me...but probably not worth it.",
289
- "id": "56"
290
- },
291
- "57": {
292
- "label": false,
293
- "text": "Impressed: Lots-O-Fun. Wood and glass toys are high quality and are a good fall back for the kids to play with they are \"bored\". Would buy again.",
294
- "id": "57"
295
- },
296
- "58": {
297
- "label": true,
298
- "text": "Light turned on by itself 3 times: The installation was easy. I used it for a week, everything worked fine, EXCEPT the light it connected to turned on by itself 3 times so far, with no one near to either one of the switch. Not sure whether it is a defective unit, or this product is too sensitive to noise. I'm returning this product and will just install a regular switch instead.",
299
- "id": "58"
300
- },
301
- "59": {
302
- "label": false,
303
- "text": "good battery: I feel kind of silly writing a review for a battery, but have to say that these last a LONG time. Work very well.",
304
- "id": "59"
305
- },
306
- "60": {
307
- "label": false,
308
- "text": "Even a Woman finds it funny: Yes, even a woman finds \"Married to Mommy\" funny. The book gets you laughing aloud when it is trying to make fun of \"Mommies\". The truth is that it really is making fun of the stupidity of men and their simple basic needs of sex, getting out of work, and beer. Of course, the truth is always funny.A definite MUST for any woman, married or not. We will now know all the secret tricks the men try to use on us.By the way, I am NOT a MOMMY!",
309
- "id": "60"
310
- },
311
- "61": {
312
- "label": false,
313
- "text": "Gungrave...not quite what you might expect: Those thinking this is another version of Trigun will be disappointed. Gungrave is actually a lot deeper and more complex. The lead is short on dialouge, but the story has more depth and character development than most anime. The first DVD is more about the main character's past than about the reanimated killing machine he's become, but it definitely leaves you wanting more.",
314
- "id": "61"
315
- },
316
- "62": {
317
- "label": false,
318
- "text": "Error in product description: It's great in every way. However, if you'd prefer a digital tuner (as I do), then you might need to look further. The product description boasts a digital AM/FM tuner, but it's disappointingly an analog AM/FM tuner.Overall - especially for the price - I think it's pretty good.",
319
- "id": "62"
320
- },
321
- "63": {
322
- "label": false,
323
- "text": "good phone but not as user friendly as it could be: Battery life is very good. Phone has good range. My only complaint is it's to involved to get your message from the handset.",
324
- "id": "63"
325
- },
326
- "64": {
327
- "label": true,
328
- "text": "Big waste of money (and space in my house!): My 5 year old son wanted this so bad, but when we got it for him, there were so many pieces to put together that didn't fit together well, he never played with it. It just sits on our floor in many pieces taking up toy space! What a waste!",
329
- "id": "64"
330
- },
331
- "65": {
332
- "label": false,
333
- "text": "Don't want to take it off: Very satisfied with an earlier purchase of this Bali bra model, I was just as pleased with the new one. Very comfortable, well made and a good neutral color. It will be my next choice, too.",
334
- "id": "65"
335
- },
336
- "66": {
337
- "label": false,
338
- "text": "Fantastico: If anybody who's into rock music is ever looking for a band to keep you on your toes, this is the band. I've been a fan for 10 years now, and no album has ever sounded like any of their previous albums. This disc is fantastic with such a variety of styles, as are the previous releases, even back to the Rainbow Butt Monkey days.",
339
- "id": "66"
340
- },
341
- "67": {
342
- "label": true,
343
- "text": "too much visual: There are far too much designs, visuals, colors, etc in the book - this is highly distracting, as TV screen can be...By way of example (among so many...), what is the use of colors with the three squares of the Pyth. theorem???? this is as useless as writting 2+3=5 with 2 in blue, 3 in red and 5 in yellow...I wish I had purchased the 2nd edition, which according to reviews was closer to what I was looking for.",
344
- "id": "67"
345
- },
346
- "68": {
347
- "label": false,
348
- "text": "Aretha's First Arista Release Showed Pleasures to Come: After a long and musically satisfying career with Atlantic, Aretha severed her ties with that company and moved under the wing of Arista's Clive Davis. With the start of the 1980's, Aretha was looking for new territory to conquer and almost succeeded with this mixed bag.\"United Together\" is a fine tune that benefits from beautiful orchestral arrangement that is matched by Aretha's superb vocal instrument. The remake of \"Can't Turn You Loose\" allows Aretha to show why she is the Queen of Soul\" for she really belts this one out. Another cover, that of the Doobies' \"What a Fool Believes,\" is an interesting interpretation. The final cut \"School Days\" appears to be \"autobiographical\" for every girl growing up in the fifties.Although not as strong as her Atlantic work, \"Aretha\" is still a suitable addition to the artist's discography.",
349
- "id": "68"
350
- },
351
- "69": {
352
- "label": true,
353
- "text": "Misguided Purchase: The photo and description do not reflect the product. The screen panel kit I received was white. What a huge inconvenience during a time-crunch.",
354
- "id": "69"
355
- },
356
- "70": {
357
- "label": true,
358
- "text": "Banacek: My husband and were looking forward to seeing this series.The first show was SO boring, we finally just quit watching it.Actually, we haven't gotten around to watching anymore. I guess we were afraid of a repeat.Maybe that was just once, I hope!",
359
- "id": "70"
360
- },
361
- "71": {
362
- "label": false,
363
- "text": "JDT: Uncle Tupelo is without doubt one of the most under appreciated groups of the 90's. Anodyne, like each of the three albums that came before it, has everything that a remarkable recording requires: great songs, honest lyrics, and artists who really care about the music they are making. Like the best of Dylan and Springsteen, the songs are about real people with real troubles and joys. When you hear them you know they are coming from the heart. The songs contributed by Jay Farrar and Jeff Tweedy are easily differentiated by the voacls, music, and lyrics. What makes this record interesting is how well these unique sounds compliment each other. The union is seamless.",
364
- "id": "71"
365
- },
366
- "72": {
367
- "label": false,
368
- "text": "Well Worth Reading: First a confession: Miriam Wasserman was my mother. However, she published several books, but this is the only one I really found useful. She walks the reader through the New York City school system and the attitudes of different groups involved in the system back in the 1960s. This includes parents, teachers and administrators. Her view is that the further away one got from parents and students, the more prestige one had. She meticulously describes the teachers' strike of 1968 against \"community control of schools\", a strike of which she is extremely critical. She explores the racism that was involved in this strike, including using quotes from striking teachers, etc. It should be emphasized that the author was pro-union all her life, so her views don't stem from an anti-union bias. The book also covers the high school student rebellion which coincided with and followed the strike.",
369
- "id": "72"
370
- },
371
- "73": {
372
- "label": false,
373
- "text": "compact and loaded: I bought this phone after reading the cnet reviews and really liked it. It looks small and really compact. I like the camera pics at 2 mega pixel and bright flash. The mp3 player is crisp. The headset that comes along delvers amazing fM radio. I think my phone is not very loud and you have a problem when you are around a noisy crowd. I just bought this phone again for my cousin. He likes it too. Almost forgot the display is very good.",
374
- "id": "73"
375
- },
376
- "74": {
377
- "label": false,
378
- "text": "Outstanding text!: Brooks/Cole should keep this text in their catalog for ages! It is well-written, examples are generally quite clear, vocabulary is introduced well, and the exercises develop real skills, rather than simply be busy-work. One of the best calculus books ever!",
379
- "id": "74"
380
- },
381
- "75": {
382
- "label": false,
383
- "text": "Excel 2003 Bible: Very good source of information. I will most likely buy other books in this series.",
384
- "id": "75"
385
- },
386
- "76": {
387
- "label": false,
388
- "text": "Tasting is Believing: Gluten-free breads used to have a gritty texture from the rice flour, and were too soft for sandwiches. Bette Hagman uses garbanzo/fava bean flour, sorghum flour, tapioca flour, and corn starch to create breads which have a similar texture to wheat flour breads, and the flavors of her breads are fabulous.My BF bought me this book and a great tasting beverage to drink it with. Since he knows I quit coffee recently, he's been really wonderful helping me in cope with my mood swings. S o y f e e is made from soy beans that is roasted just like coffee. I enjoy the taste and don't miss coffee one bit. Buy it online at www.s o y c o f fee.com.This is a 'must have' for anyone baking gluten-free. I think all of Bette Hagman's books are wonderful and a must for those with gluten intolerance.",
389
- "id": "76"
390
- },
391
- "77": {
392
- "label": false,
393
- "text": "5 stars for the show, no stars for the \"Collector's Edition\": I was really looking forward to getting this Collector's Edition and see what extras were added. I knew it wasn't a lot - just a mini-book and a documentary - but I figured it would be packaged in a cool way.Wrong.As others have already mentioned, the Collector's Edition is *literally* theAvatar: The Last Airbender - The Complete Book 1 Collectionslipped into another cardboard box, with a little booklet and DVD in an envelope (not even a case!) wedged in. It's really disappointing; it would have been so easy to create a quality Collector's Edition but the studio couldn't be bothered, I guess.",
394
- "id": "77"
395
- },
396
- "78": {
397
- "label": false,
398
- "text": "sula scottcampos: Sula, a book that talks about the issues of being a black women is a really good novel to read.One of the reasons I recommend it is because of its realism and its themes - death, sex, friendship and poverty.I also think that its characters are very good, its easy to identify with one or both of them. I really recommend this book to anyone who enjoys good literature.",
399
- "id": "78"
400
- },
401
- "79": {
402
- "label": false,
403
- "text": "Fantastic! It's a must-have for girls!: I hated razor, tried shaving but it did not work for me. Shaving made the hair grows thicker and faster afterwards, plus the roots are impossible to be getting rid of. After reading the reviews, I ordered it to try, I used it for once and already fall in love with this. I used to use small tweezer to pluck out my leg's hair, in order to avoid the razor, it took me a few hours to do that but this super electronic tweezer works wonder! You won't see the black roots and I have smooth and silkly legs in 20 mins. It does not hurt at all, if you use it on your legs. But, if you use it at your under arm, it won't be a pleasant feeling, of course! I will never use anything else besides this for hair removing anymore! highly recommended!",
404
- "id": "79"
405
- },
406
- "80": {
407
- "label": true,
408
- "text": "This is not a toy: I guess I was expecting more out of these leave window decals. I just didn't find them attractive after placing them on my window, they seem very cheap, I guess because they are cheap.I threw them away.",
409
- "id": "80"
410
- },
411
- "81": {
412
- "label": false,
413
- "text": "Wonderful book for anyone running a professional hatchery: This book is aimed more for hatcheries that are raising Trout, Salmon, Catfish and other food fishes. However, there is so much information in this book that even ornamental fish hatcheries will find an incredible amount of useful information. The chapters on Fish Nutrition are especially helpful.",
414
- "id": "81"
415
- },
416
- "82": {
417
- "label": false,
418
- "text": "Amazing book!!: Once again, Eric Victorino's artistic talent is put into this great free-verse poetry book. I couldn't put it down and I finished it the day I received it in the mail. All of the poems are awesome but the one I found the most interesting was \"It's A People Business.\" All of the experiences in his life, personally and with his band, come to life in this book. Please check it out! It's worth every penny!!",
419
- "id": "82"
420
- },
421
- "83": {
422
- "label": false,
423
- "text": "The white trumpet contender respect Miles Davis!: The story of the Jazz in the Fifties certainly would be remain unfinished without the ominous presence of this outstanding virtuoso. Baker sound still possesses this alluring hook, this magnetic engagement charm, eloquent expressiveness, enrapturing lyricism and contagious rhythm, despite the elapsed time, which confirms by itself the status of his musicianship.This selection is jus a little sample of the broad universe of his genius. A well thought selection of great musical successes, available, preserved and immortalized by the Digital Technology for our future enjoyment.Absolutely indispensable in your treasured collection.",
424
- "id": "83"
425
- },
426
- "84": {
427
- "label": true,
428
- "text": "What the?: I'm sorry, maybe it's just me but I can't helping stating that this has to be one of the wrost movies I've seen in my life!Can you say boring? Can you say doesn't make sense at all? The first 30 minutes of the movie were O.K. But it went downhill after that. This movie is a prime example of a director attempting to make a deep movie with a meaningful lesson but failed on all levels. I don't recommend this movie unless you want to go to sleep or you don't have anything else to do.",
429
- "id": "84"
430
- },
431
- "85": {
432
- "label": false,
433
- "text": "very very good!!!!: linda blair is a young girl who is possessed. and her mother doesn't know what to do until one day when she hears her daughter screaming and stabbind herself she knows what to do GET AN EXORCIZIM!!!",
434
- "id": "85"
435
- },
436
- "86": {
437
- "label": false,
438
- "text": "Awesome product for the price!: This range extender works as advertised! I am very happy with the purchase. I was a little worried after reading some of the horror stories here, but I have to say, Chovy's review instructions (on this site) were just this ticket to get the repeater up and running in less than 30 minutes. It was unbelievably easy to install! Do not be frightened by negative reviews. If you can set up a wireless network, you can set up this repeater. However, I did upgrade the firmware before I did anything else and maybe that helped. I got the firmware update from the Belkin site.",
439
- "id": "86"
440
- },
441
- "87": {
442
- "label": true,
443
- "text": "Slight: This book is either a heavily illustrated short story collection or a text-heavy comic. Its unusual format is its most original feature. Its plots are negligible, but its illustrations and text evoke a unique atmosphere of self-conscious nonconformism. Although its target audience is dare-to-be-different teens and college students, its interesting turns of phrase and expressive line drawings are not devoid of interest for general audences.",
444
- "id": "87"
445
- },
446
- "88": {
447
- "label": false,
448
- "text": "ANgeleyes: Seem to dry up their eyes fairly well, although I haven't seen the color (brown stain) change much yet.",
449
- "id": "88"
450
- },
451
- "89": {
452
- "label": true,
453
- "text": "Nice Try: Salt Lake 2002 is not a bad game, but it isn't good either. The graphics are excellent, but some of the events are bad. Bobsleigh, and skiing aren't bad but the others are. You dont stay into it for long. I liked it for a while, but it gets boring.",
454
- "id": "89"
455
- },
456
- "90": {
457
- "label": true,
458
- "text": "Cutler's share of the pie: This book was a major disappointment. I am familiar with books written solely by the Dalai Lama, such as the \"Library of Tibet\" series, which are much more engrossing and have much more substance than Cutler's book. Cutler attempts (successfully, sadly) to have his share of the profitable market that involves the Dalai Lama's writings. The book is insipid, does not try to explain any important issue in the light of Buddhist philosophy, and only rehashes issues that several other westerners already wrote about. It's another big ego trip: we keep hearing time and again about his opportunities to be with the Dalai Lama. What a shame, Cutler. I sold the book as soon as I finished it.",
459
- "id": "90"
460
- },
461
- "91": {
462
- "label": true,
463
- "text": "Mostly tedious, with interesting parts: I found the writing interesting, and the subject fascinating, but I found myself frustrated by the author's difficulty in talking directly about the status of Muslim women with her interview subjects. The author spent many pages writing about the menus and dress of the many middle and upper-middle class women she interviewed. It seemed as though her interview subjects resisted her efforts to discuss the status of women in their countries, so we too as readers had to wade through much distracting material and misunderstandings about feminism and gender. Great travel stories, but not a great source of information about Muslim women.",
464
- "id": "91"
465
- },
466
- "92": {
467
- "label": true,
468
- "text": "Sesame Street Toddler: I did not find this game to be as educationally sound as I would expect from Sesame street. There is too much talking before the program will react to a command. The graphics are jerky and the cursor acts like the target is magnetically charged and keeps pushing away the cursor. When the child actually does manage to click on a target, the cursor may still fly to another target and the child is told that his answer is wrong. Another example of educational problems is the pronunciation of \"eggs\" using a long \"a\" sound instead of a short \"e.\" This is not very helpful in teaching a child the sound for short \"e.\" Children that are used to playing computer games by themselves may find that this game is too frustrating to do alone. The open ended learning curve is a great idea. I just wish Sesame Street would hire a truly qualified literacy expert to help clean up the many problems in this program.",
469
- "id": "92"
470
- },
471
- "93": {
472
- "label": true,
473
- "text": "needs a buzz cut and a point: I avoided reading this book, not because of the hermaphrodite subject matter, but because I have never read a multigenerational family saga that I liked. Many books let me down in the middle, and this was no exception. The beginning of the book was incredible and harrowing, with momentum and characterization. The post-America nextgens part of the saga was so boring I found myself flipping and flipping - always a bad sign. If there was some kind of larger point to all of that, then I must have missed it. Yes there's the identity duality and trinity themes playing out here: man/woman, greek/turkish/american modern/old world sick/healthy innocent/guilty original/reinvented. But it was almost as if the author was saying - here it is again - get it? I like my fiction much more subtle than this.",
474
- "id": "93"
475
- },
476
- "94": {
477
- "label": true,
478
- "text": "OMG! DO NOT BUY!: I normally don't take the time to submit a review.In this case however, I feel obligated to do so.This is by far one of the worst purchases I have ever made.Here's why.....The contraption is far too bulky.The case's enclosing is unbearable, takes a good minute or so to open it.The texture of the material feels like a cheap toy.The overall design is horrible, something I could make in my basement.For the love of everything sacred, do not buy this thing.",
479
- "id": "94"
480
- },
481
- "95": {
482
- "label": false,
483
- "text": "Good price, good quality: Comparable HDMI cables can be bought for 45 or more. Even though the price is cheap the quality is good, no problems so far.",
484
- "id": "95"
485
- },
486
- "96": {
487
- "label": false,
488
- "text": "Good rock music: This is what i call rock music good beat and good lyrics, don't listen to the other reviews. This cd is one of the best, listen to a few songs and you will get hooked. I recommend this cd its awesome.",
489
- "id": "96"
490
- },
491
- "97": {
492
- "label": true,
493
- "text": "BORING!: This movie is soo boring. How in the hell did this movie make so much at the box office. Do people really want to pay for crappy movies like this. bottom line this is a chick flick nothing is good. And now they are re-releasing this movie with more boring stuff. This is the worst movie ever.",
494
- "id": "97"
495
- },
496
- "98": {
497
- "label": true,
498
- "text": "Already Rusting: Inferior quality. The plating is thin and rust is coming through the finish. Inexcusable for a product that is designed for use in a humid environment.",
499
- "id": "98"
500
- },
501
- "99": {
502
- "label": true,
503
- "text": "confusing internet setup: i wanted a camera that could email photos but this camera will not go out through the router and the manual setup , to punch a hole thru router is confusing.",
504
- "id": "99"
505
- },
506
- "04c7dfc0f94e4e88968d09b40edbfa14": {
507
- "label": true,
508
- "text": "The new gaming console is unaffordable.",
509
- "id": "04c7dfc0f94e4e88968d09b40edbfa14"
510
- },
511
- "58f58a1a4cbb4bb699772ed934006ec8": {
512
- "label": true,
513
- "text": "How can it be sure difficult for @115830 to deliver a package to a University address? Two failed attempts so far ...",
514
- "id": "58f58a1a4cbb4bb699772ed934006ec8"
515
- },
516
- "d4a3cd4877c54aef81c376eff8008df4": {
517
- "label": false,
518
- "text": "@204780 Glad they showed up! Hope you have a great flight! -Sean",
519
- "id": "d4a3cd4877c54aef81c376eff8008df4"
520
- },
521
- "affe1d6548f84bed84238bac45cc10a1": {
522
- "label": false,
523
- "text": "@British_Airways Thank you! All looks good then \ud83c\uddec\ud83c\udde7\u2708\ufe0f",
524
- "id": "affe1d6548f84bed84238bac45cc10a1"
525
- },
526
- "e304ea77a94c450a95690c7b605a035f": {
527
- "label": false,
528
- "text": "@246667 Thank you for reaching out, Andrea. The built in application in Windows 10 are exempted to be uninstalled. However, you can send this suggestion directly to our developers via the Feedback Hub so they can take a look at it: https://t.co/jowrfbgQm6. Keep in touch.",
529
- "id": "e304ea77a94c450a95690c7b605a035f"
530
- },
531
- "76b694b019eb4e6888a422e144030bd0": {
532
- "label": true,
533
- "text": "@GWRHelp It\u2019s mainly the constant short forming and cancellations due to mechanical faults Phil. As a company, these excuses have been used ad nauseam for years and years. It just gets worse and no amount of rhetoric and IET self promotion can hide that fact.",
534
- "id": "76b694b019eb4e6888a422e144030bd0"
535
- },
536
- "ce0698020b7a457396c7674b04db10e6": {
537
- "label": false,
538
- "text": "English gangster flick.",
539
- "id": "ce0698020b7a457396c7674b04db10e6"
540
- },
541
- "52bda6cbab224899845e66e0474cdefc": {
542
- "label": false,
543
- "text": "sees the formula graph, the chip calculates the formula, able to \"survive\" thanks to its connection to Edit, develops a parallel personality and affords her abilities greater than she ever imagined...",
544
- "id": "52bda6cbab224899845e66e0474cdefc"
545
- },
546
- "435aabe68c294963a05e090d479582bc": {
547
- "label": false,
548
- "text": "Aanandam is a 2016 Indian Malayalam campus musical film written and directed by Ganesh Raj in his directorial debut. Vineeth Sreenivasan produces the film under the banner of Habit Of Life with Vinod Shornur under Cast N Crew.",
549
- "id": "435aabe68c294963a05e090d479582bc"
550
- },
551
- "f96313d0087e4941a359783634ef9e86": {
552
- "label": false,
553
- "text": "The remarkable story of The Weather Underground, radical activists of the 1970s, and of radical politics at its best and most disastrous.",
554
- "id": "f96313d0087e4941a359783634ef9e86"
555
- },
556
- "f63e4502791a409fa2d750687d3841eb": {
557
- "label": false,
558
- "text": "A young widow on a trip to the backwoods stumbles upon the operation of a gang of drug smugglers. They attempt to kill her in order to keep their operation a secret, but she turns out to be more resourceful than they thought, and starts to turn the tables on them.",
559
- "id": "f63e4502791a409fa2d750687d3841eb"
560
- },
561
- "108ac02949324b02bdcbe4c7a77bacdc": {
562
- "label": false,
563
- "text": "The story of a young Marine, fresh from Camp Pendleton, who is forced to confront the complexities of adulthood and a volatile home life during a four-day Thanksgiving leave.",
564
- "id": "108ac02949324b02bdcbe4c7a77bacdc"
565
- },
566
- "44fc412246964b2393fa0035ff093a00": {
567
- "label": false,
568
- "text": "Exploring the rough and tumble world of hockey, Academy Award winner Alex Gibney (\"Taxi to the Dark Side\") looks at the world of the NHL enforcers and specifically the career of Chris \"Knuckles\" Nilan who helped the Montreal Canadiens win the Stanley Cup.",
569
- "id": "44fc412246964b2393fa0035ff093a00"
570
- },
571
- "409350c111af4ba3a94c842b797ddb95": {
572
- "label": false,
573
- "text": "Two fishing fanatics get in trouble when their fishing boat gets stolen while on a trip.",
574
- "id": "409350c111af4ba3a94c842b797ddb95"
575
- },
576
- "d48d8f3b5a524ecea69bae718d1f1513": {
577
- "label": false,
578
- "text": "A willful young boy follows his just as obstinate grandmother in a journey across Iraq, determined to discover the fate of her missing son, Ahmed's father, who never returned from war.",
579
- "id": "d48d8f3b5a524ecea69bae718d1f1513"
580
- },
581
- "283e96de5b474240a044c50dbc2551fb": {
582
- "label": false,
583
- "text": "A group of people are sitting in a theatre watching a movie when one realises that the woman on the screen is her. (IMDb)",
584
- "id": "283e96de5b474240a044c50dbc2551fb"
585
- },
586
- "516d0f2f3a854a97a87c64db19a89fac": {
587
- "label": false,
588
- "text": "of the fake prediction. Fantastic swashbuckling adventures in a 18th century setting, with a light criticism of the war and the mighty.",
589
- "id": "516d0f2f3a854a97a87c64db19a89fac"
590
- },
591
- "c2f55710669b40aa937625fe0ab04065": {
592
- "label": false,
593
- "text": "famous for his reputation as a Don Juan, to seduce C\u00e9cile and emotionally destroy her. While on his mission, Valmont gets sidetracked when he goes to visit his aunt and falls for Madame Tourvel, a virtuous, married woman who knows of his womanizing ways, but that only makes the challenge more exciting to Valmont. Together, Madame de Merteuil and Valmont make a dangerous team and they will stop at nothing when it comes to matters of the heart.",
594
- "id": "c2f55710669b40aa937625fe0ab04065"
595
- },
596
- "ba0261b2ee3244d29bb3a8c6d77195a6": {
597
- "label": false,
598
- "text": "sees the formula graph, the chip calculates the formula, able to \"survive\" thanks to its connection to Edit, develops a parallel personality and affords her abilities greater than she ever imagined...",
599
- "id": "ba0261b2ee3244d29bb3a8c6d77195a6"
600
- },
601
- "5e724fbde8ee44d9a8fc87a6e6667f01": {
602
- "label": false,
603
- "text": "telling the story about people who despite all obstacles strive for their goal.",
604
- "id": "5e724fbde8ee44d9a8fc87a6e6667f01"
605
- },
606
- "557eba5ebfc9467a9d88688afed41354": {
607
- "label": false,
608
- "text": "A young playboy who learns he has one month until he becomes infertile sets out to procreate as much as possible.",
609
- "id": "557eba5ebfc9467a9d88688afed41354"
610
- },
611
- "aa20e22fbe96487d8ee1223a6ef4da0b": {
612
- "label": false,
613
- "text": "Set in modern times, Alex finds King Arthur's sword Excalibur and must prove himself worthy of it.",
614
- "id": "aa20e22fbe96487d8ee1223a6ef4da0b"
615
- },
616
- "bea56d34f6df408c9ec9653b17a90a93": {
617
- "label": false,
618
- "text": "Kostis is a 40-year-old doctor that finds himself in the small island of Antiparos, in order to take over the local clinic. His whole life and routine will turn upside down when he meets an international group of young and beautiful tourists and he falls in love with Anna, a 19-year-old goddess.",
619
- "id": "bea56d34f6df408c9ec9653b17a90a93"
620
- },
621
- "e61a3251720d425c9f4770cb4b11d2d9": {
622
- "label": false,
623
- "text": "Friends on a weekend excursion take a path into a forest that leads to death and destruction.",
624
- "id": "e61a3251720d425c9f4770cb4b11d2d9"
625
- },
626
- "5471008376cf44518f2ff1f67f057c08": {
627
- "label": false,
628
- "text": "Mr Bournelis suggested all 30 lineal metres of blockwork should be removed and replaced, which would require removing and reinstalling the fence. The total cost of his suggested method of rectification was said to be $14,650 for each unit, giving a total cost of rectification of $29,300.",
629
- "id": "5471008376cf44518f2ff1f67f057c08"
630
- }
631
- },
632
- "version": 27,
633
- "description": "Negative sentiment"
634
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/concept/lilac/negative-sentiment/sbert.pkl DELETED
Binary file (106 kB)
 
data/concept/lilac/positive-sentiment/concept.json DELETED
@@ -1,564 +0,0 @@
1
- {
2
- "namespace": "lilac",
3
- "concept_name": "positive-sentiment",
4
- "type": "text",
5
- "data": {
6
- "0": {
7
- "label": false,
8
- "text": "Starting To Be Annoyed By Becky...: I'm not sure why I keep reading these books, but I guess it's because I've read the first two so I'll keep reading the rest of the books. In the first book, I really found it amusing. I was a little annoyed by the fact that Becky couldn't stop spending, but then again that's why she is called a Shopaholic. In the second book, I felt more of the same it was just magniifed more. Now in the third book, I'm just down right annoyed by Becky Bloomwood. In this book, she wasn't going on crazy shopping sprees, just planning two different weddings because she was afraid to tell each person and because I feel she's really selfish. Still, I read the book because I wanted to see how she could get herself out of another situation. I will say that I love her friends Suze and Danny, her client Laurel and her husband Luke. Maybe that's why I keep reading. I will read the next book, but I'm sure I'll be just as annoyed when I'm done.",
9
- "id": "0"
10
- },
11
- "1": {
12
- "label": false,
13
- "text": "the cover is fine - the pool is horrible: The entire pool was horrible. The cover was fine once we got it on, but we finally got rid of the pool after 2 weeks because it was so hard to set up and keep clean.",
14
- "id": "1"
15
- },
16
- "2": {
17
- "label": true,
18
- "text": "Good album, not their best.: This album is probably the most marketable and radio friendly of all of dashboard's albums. For the peripheral listener it may be the right one to get to introduce you to this band. But as a Dashboard fan of 5 or so years I truly hope they return to their original sound for their next work. Not for the listen-ability but for the show. To this day the fans react best to the songs from \"Places\" or \"A Mark, A Mission.\" I recommend this album to everyone but I also recommend any and all of their other work.",
19
- "id": "2"
20
- },
21
- "3": {
22
- "label": false,
23
- "text": "This is a horror novel, right?: Never one to pass up any vampire novel, I purchased Sips because the description seemed interesting. Vampires, Marquis de Sade, fetishism, yada yada yada. If this is a comedy, I give it 4 stars; however, I'll give it 1 star as a horror novel. Sade was rather boring; I would think a character as intense and multi-faceted as the Marquis de Sade would make for a more interesting vampire. The writing style isn't too bad, but overall I found the characters to be mildly amusing at best. The plot was thin, the end was anti-climactic, and the vampires were not very frightening. The book had little suspense, and it leaves a mile-wide opening for a sequel at the conclusion. I would, however, like to see something more of the vampire mutants lurking in the graveyard. They were the most riveting of any of the characters.",
24
- "id": "3"
25
- },
26
- "4": {
27
- "label": true,
28
- "text": "Superb mix of global non secular musical denominations: I first heard Ms. Pook's music on the \"Eyes Wide Shut\" soundtrack (the masquerade ball scene) and was blown away; if ever there was a necessity for music to permeate a scene in a film this was it. She incorporates a blend of the traditional songs from faiths across continents and mixes them, for lack of a better comparison than similar to your quintessential raver d.j. (though these are better and definitively more original :) \"Oppenheimer\" is my favorite, and if you let the last track run for a few minutes a portion of the song will play once more. I can't wait to hear more of her stuff - these hymns are awesome.",
29
- "id": "4"
30
- },
31
- "5": {
32
- "label": true,
33
- "text": "A moving and suspenseful story!: For anyone familiar with the occult, this book is likely to raise hairs on the back of your neck as you read. Even if you're not, the storyline is suspenseful and fascinating, and the characters evoke great sympathy and admiration. An excellent read.",
34
- "id": "5"
35
- },
36
- "6": {
37
- "label": true,
38
- "text": "Simple & Easy to Use - A Practical approach to eating out: This guide is extremely to use. It provides sample menus that you'd see at Chinese, Indian and Thai restaurants. Then you are provided with descriptions of each dish and how it is prepared and the ingredients used. From there you are provided with specific considerations as to how the preparation or ingredient list may affect you if you have Gluten or Allergen issues.This book is the size of a passport and very organized and well written. The Chinese, Indian and Thai Cuisine Passport is perfect for making choices while traveling, or while dining at your favorite local restaurant.",
39
- "id": "6"
40
- },
41
- "7": {
42
- "label": false,
43
- "text": "Being Fair....I am a fan of hers: and I really enjoyed her previous works, more than I could have ever imagined, but this record is horrible. The songs are trite, the lyrics are incredibly boring, indulgent and immature. The music is pop staple, with forgetable melodies and repetative chorus lines, I feel as if the studio wrote the entire album for her while she was sleeping, this just doesn't speak to any of her previous works at all. This album fits on the same shelf with a Nickelodeon-themed CD. Instead of heading in the direction of an artist like Alanis Morrisette, she is going backward and joining the ranks of Hannah Montana and the Naked Brothers Band. She is a great artist and her first two records are amazing. She is better than this CD and I am looking forward to her next effort.",
44
- "id": "7"
45
- },
46
- "8": {
47
- "label": false,
48
- "text": "Sucked: I thought the DVD sucked tremendously. It was very boring and if I could, I would return it for a refund. There was only one \"small\" clip of Dylan himself. I'm very disappointed.",
49
- "id": "8"
50
- },
51
- "9": {
52
- "label": true,
53
- "text": "Excellent product: Easy to install. If you have a newer furnace you probably do not need the swail switch as the HE220A comes with a Humistat which can be connected to the furnace. They recommend the Honeywell 32005847-001 Installation Kit, Bypass which is a little pricey and you can probably buy the pieces of this kit cheaper individually from Home Depot or Lowes or ACO as well as the filters.",
54
- "id": "9"
55
- },
56
- "10": {
57
- "label": true,
58
- "text": "Very happy.: I am very happy with this trashcan. I was unable to find one in the stores to fit the space in my cabinet, but this one does the job. It is very sturdy and looks like it will put up with years of use.",
59
- "id": "10"
60
- },
61
- "11": {
62
- "label": false,
63
- "text": "These aren't Throughbreds!: This makes me so mad. All these new authors are coming and changing the series. Nothings the same anymore and the plots are repeditive. Don't even bother reading these books until #32 these are like a different series. I don't know excactly what's happing but these new authors suck!",
64
- "id": "11"
65
- },
66
- "12": {
67
- "label": false,
68
- "text": "Large and slow are a bad combination.: I bought this TV and returned it a week later, because it blurred so badly with motion that sports were unwatchable. I ended up buying a smaller Sony XBR4, and I have none of the issues (plus the picture is far, far better).This has nothing to do with 60 vs 120Hz. That is more important for DVDs and Blu-Ray signals that are 24fps (which doesn't divide evenly into 60 but does for 120). The LT52133 has an 8ms response time, which is extremely slow. A decent LCD should be 5 or lower.If you want an LCD, choose speed and quality over size. If you want size and quality but want to spend less, buy a plasma. Don't buy a big, cheap, slow LCD!I gave it 2 stars because I like the interface and remote.",
69
- "id": "12"
70
- },
71
- "13": {
72
- "label": false,
73
- "text": "Skip it: This movie is very poorly written and the result is not distressing, just lame. The actors do their best but from very early on it is obvious that the material gives them nothing to work with. Fans of Colin Firth will experience a certain dim level of enjoyment. Minnie Driver is a treat but her character is no better written than the others. Vermont locations are worth something. With one or two moments of exception it's neither comedic nor romantic.",
74
- "id": "13"
75
- },
76
- "14": {
77
- "label": true,
78
- "text": "Belive it i went to the concert?: hi everyone let me tell you i went to the concert i was amazed with what i saw cher was awsome i tell you buy the dvd. as i sat in front of the stage cher was doing a great job to us the she is living proof . So i urge you to buy it?",
79
- "id": "14"
80
- },
81
- "15": {
82
- "label": true,
83
- "text": "Vale la pena.: En este libro se narra de una forma muy interesante la vida de una familia en particular. Lo que mas me gusto de este libro fue la manera en que la autora describe a lo largo del libro las personalidades de los sujetos envueltos en la novela; que vienen a ser muy distintos y extremos, lo cual, intensifica el drama... Definitivamente es un buen libro y lo recomiendo a todos.",
84
- "id": "15"
85
- },
86
- "16": {
87
- "label": true,
88
- "text": "Nummie Children's story: I ordered this book for our grandchildren. Two boys 5 & 3 and a 4 month old girl. All love the story. The mouse is determined.",
89
- "id": "16"
90
- },
91
- "17": {
92
- "label": false,
93
- "text": "Seem to be alone on this one: Looking at the other reviews, I seem to be the only one that was disappointed with this book. The content is too babyish in most of it for older tweens and the more \"grown up\" content would be over a younger tween's head. I had a quick read through and with every paged turned, I thought duh. I'll be looking around for another book shortly.",
94
- "id": "17"
95
- },
96
- "18": {
97
- "label": true,
98
- "text": "Best yet: by far the best EA game yet. I especially like the easy controls and kick - a graphics. the playbook is extremely accurate and detailed. Also the fight songs and cheerleaders were a nice touch. this is an excellent game and worth checking out.",
99
- "id": "18"
100
- },
101
- "19": {
102
- "label": false,
103
- "text": "washed out: A bit like Simply Reds version of the Valentine bros hit \"Moneys too tight to mention\" - this cover version has nothing of the driving energy that characterised the original recording.",
104
- "id": "19"
105
- },
106
- "20": {
107
- "label": true,
108
- "text": "great water bottle: I love this bottle it is great. I like knowing it is non toxic and it just works very well. You can have it full and lay it down and it doesn't leak at all.",
109
- "id": "20"
110
- },
111
- "21": {
112
- "label": true,
113
- "text": "Nice goggles: I am pretty happy with these goggles. They work well during swim workouts in the pool. I do notice a little bit of fogging from time to time. I had hoped to wear them during an upcoming triathlon, but based on a few instances where they slipped a little in the pool I am concerned that they won't be secure enough. I will keep using them in the pool, but will likely get different ones for open water races.",
114
- "id": "21"
115
- },
116
- "22": {
117
- "label": false,
118
- "text": "aaahhh nnnoooooo!: Hopefully the last film in one of the worst horror trilogys ever made. This series pretty much ruined the horror film for years to come, for one its too self aware, thats incredibley annoying, second everyone acts like they are on Friends or some sitcom. The acting is just plain bad and unconvincing. Now the gore, if you're going with material this weak you should load it up with disgusting violence, is there any in the Scream series? No.Everyone went to see this movie just to see who THE KILLER is. This movie sets low standards to be met, you expect alot of people to die, one shock, then we find out who the killer is, then you go home. Every horror film being made today is like that, there's nothing new or exciting or risk taking, its the same stuff over and over and people are laping it up like dog food.This film is what you've come to expect, bad acting, some people die and we eventually find out who the killer is and all is merry and well. Pathetic.",
119
- "id": "22"
120
- },
121
- "23": {
122
- "label": true,
123
- "text": "A classic of its kind: This movie is a classic of its kind and much better that a lot of movies, that followed. It is not one of the best, but it still deserves five stars...",
124
- "id": "23"
125
- },
126
- "24": {
127
- "label": false,
128
- "text": "Nice suite, but Virtual PC 7 disappoints on my G5: I purchased the upgrade since I'd already bought both Office v.X and Virtual PC 6.1 last year.The biggest letdown is that Microsoft's promised support for the G5 is nearly non-existent. I have a dual processor G5 with an ATI Radeon 9800 card (Apple), and after trying to install Virtual PC 7 three times, I cannot get a VM to work. It did install (and work) flawlessly on my G4 Powerbook. Googling for reviews finds it's very hit or miss, but if (when) it misses, you'll regret investing the extra $$$ in an immature product.",
129
- "id": "24"
130
- },
131
- "25": {
132
- "label": false,
133
- "text": "Okay player, don't expect a miracle: I bought this DVD player at Circuit City earlier this yr for about a $100. I hooked it up to a 47\" Vizio LCD (which by the way has an awesome picture) using a HDMI cable. After fine tuning this product, I was very, very, very diasppointed. The picture was very \"grainy\" (lots of pixels). I have a $35 DVD player that only utilizes an s-video cable that produces a much more crisp picture. Be warned, the picture stinks.",
134
- "id": "25"
135
- },
136
- "26": {
137
- "label": true,
138
- "text": "A revelation of the science of consciousness evolution and all natural growth: Here is a readable and fascinating account of the development of the new science of chaos theory, the only body of ideas that describes how the natural world as experienced by human beings emerges out of basic quantum processes. The different explorers and innovators of the new science are introduced in a personable way that will enchant the interested reader.",
139
- "id": "26"
140
- },
141
- "27": {
142
- "label": false,
143
- "text": "Don't say that I didn't warn ya' !: I'm absolutely convinced that Delbert McClinton had no controlover the release of this CD. I rated it 1 star simplybecause there is no 0 star rating ! In actuality , I am not certain that the vocalist on this recording IS Delbert McClinton. Only on the Mr. Pitiful track is there any similarity at all to Delbert's voice. This is the perfect CD for someone with money to burn who would like to have a recording of a 1960's garage band recorded in a garage and who should be working in a garage ! Delbert fans...run fast and run far away from this ! END",
144
- "id": "27"
145
- },
146
- "28": {
147
- "label": false,
148
- "text": "This item is not available: I ordered this unit on February 7th. Every time I checked back on the status of the order, it read \"not shipped\" and the estimated shipping date got moved out. I really don't think this unit is avaialble from the company anytime soon. I cancelled the order.",
149
- "id": "28"
150
- },
151
- "29": {
152
- "label": false,
153
- "text": "I used to like ABBA...: I used to like ABBA, until I saw Mama Mia! A horribly disjointed musical, where songs feel contrived to fit into the story; a story that doesn't seem to come together. Individual songs are usually done alright, but don't segue from one to another very well.The cast butchered several of the songs, but especially S.O.S, Take A Chance On Me, and anything where Pierce Brosnan sang. On a side note, I also counted at least two violations of Chekov's Gun. And finally, I think it has a bad moral message. Which you only recognize if you manage to sit through the whole thing.If there is justice in the world, cast members without established careers won't get to have them as punishment for the worst movies I've seen since The Talented Mr. Ripley.",
154
- "id": "29"
155
- },
156
- "30": {
157
- "label": false,
158
- "text": "A complete disaster!: If you're like me, you probably wanted to check out this movie because it sounded like it really could be an excellent supernatural Gothic horror tale full of goblins and wicked things alike. Well, don't make the same mistake I did and actually watch it. It's horrible. Terrible. An honest to goodness waste of film. The acting is wretched, the film quality is rotten (it actually looks twenty years older than it is), and the plot is thin, weak, and does not give you what it's supposed to. The only reason I bothered to give this film 1 star is because of Alexis Arquette -- he's great looking, but should have left this film out of his career.",
159
- "id": "30"
160
- },
161
- "31": {
162
- "label": true,
163
- "text": "beautiful detail: I just purchased these Dover COloring Books for my mother and she loves them. The detail is out of this world and the variety of colors you can use are only limited by your inagination. HIGHLY RECOMMENDED!",
164
- "id": "31"
165
- },
166
- "32": {
167
- "label": false,
168
- "text": "Very disappointed: I looked forward to getting this movie as I had heard many good things about it but it was nothing like I had imagined or been led to believe. There is very little actual history in it or real Christian experience except for the background because the main focus is a soap opera style romance and caricature figures. I agree with the reviewer who described it as a mixture of \"tawdry Hollywood sex\" somehow interspersed with a vague nod to Christianity. The only decent scene was the arena scene where the Christians are going to their deaths singing hymns - but that's not enough to make it a great or even a good movie. Not personally to my taste anyway.",
169
- "id": "32"
170
- },
171
- "33": {
172
- "label": false,
173
- "text": "Unreliable minikit: I bought this minikit because it got good reviews and it would be perfect for my purposes. However it switches on and off whenever it wants, it looses contact with the phone. Very often the on/off button works only in a horizontal position (?) I use a Treo 650, which is on the compatible phone list. When I contacted Parrot, they said it wasn't (?) At last I opened the unit, but there are no moving parts inside except the micro switches. It is giving me a headache, so I will go searching for an alternative.",
174
- "id": "33"
175
- },
176
- "34": {
177
- "label": true,
178
- "text": "A Christmas Classic!: This is surely one of the best classical Christmas recordings available. Don't buy the older version, as the quality of this recording is excellent. This is one of those \"Every Christmas - Can't have Christmas without\" recordings.",
179
- "id": "34"
180
- },
181
- "35": {
182
- "label": false,
183
- "text": "too narrow: These were the narrowest pair of D size shoes I have ever tried on. I don't care how nice a shoe looks. If it don't fit it just don't fit.",
184
- "id": "35"
185
- },
186
- "36": {
187
- "label": false,
188
- "text": "Lack of extension: This earphones lack a descent extension cord. ITs very small cable, but its of good quality. Sadly, cord its too short, and the extension is useless.",
189
- "id": "36"
190
- },
191
- "37": {
192
- "label": true,
193
- "text": "Easy-Reading: This is the 3rd Southern Sisters Mystery I've read. They're easy, fast and funny murder mysteries, with lots of cute family stories intertwined in the intrigue.",
194
- "id": "37"
195
- },
196
- "38": {
197
- "label": false,
198
- "text": "it'd be great if it worked like it was supposed to: for the first 30 seconds it was lovely, but i believe that either the motor isn't powerful enough to keep the shaft rotating smoothly or 3 AA batteries just don't provide enough juice for the motor to work more than 30 seconds. it was a nice idea, but i'm rather dissapointed. the jelly material is somewhat difficult to maintain also. i think if it were hooked up to a larger battery pack it'd be WONDERFUL... which i think i may have a macgyver friend with a knack for electronics attempt to do for me.",
199
- "id": "38"
200
- },
201
- "39": {
202
- "label": true,
203
- "text": "Not Hornby's best but still good: I loved About a Boy and really, really loved the sardonic wit of High Fidelity. About a Boy is much deeper but just as cynical. Maybe even more so. The characters are richly drawn and just complex enough to keep the reader wanting more. Good read, but best to take some time with this one. Not recommended for a summer beach read.",
204
- "id": "39"
205
- },
206
- "40": {
207
- "label": false,
208
- "text": "A Disappointment: As with most Taunton Press publications, the illustrations and photographs in this book are spectacular and the organization and layout is superb. Nonetheless, I found this book disappointing. It lacks both depth and breadth. I had hoped for a detailed review of wood joinery including some of the more unusual joinery found in Japanese woodworking. This book, however, is targeted more toward the beginner. Even so, it does not cover the details and \"tricks\" of even the most basic techniques in sufficient detail to allow beginners to easily reproduce them. Consequently, it is unclear who this book was written for - not the beginner as it lacks depth, and not the advanced woodworker as it lacks breadth. Far more effort appears to have been put into appearance and organization than in content.",
209
- "id": "40"
210
- },
211
- "41": {
212
- "label": false,
213
- "text": "Horrible. Don't do it!: Great price for the item when a 6' one of these at Best Buy is $20. Thing is, the one from Best Buy fits in the outlet and stays there. This cord fits very loose and does not connect. I bought 2 of them, neither did what they were suppose to.As much as I hate to say it, but, buy the more expensive one. At least it works.",
214
- "id": "41"
215
- },
216
- "42": {
217
- "label": true,
218
- "text": "Given as a gift...: Given to my best friend as a gift. She loves it. Her fiance enjoys making coffee for her in the mornings. :)",
219
- "id": "42"
220
- },
221
- "43": {
222
- "label": true,
223
- "text": "Love the ring.: This is a nice ring. I was worried it out be thin and cheap looking, but it's not. It's a very pretty stylish ring. Go for it.",
224
- "id": "43"
225
- },
226
- "44": {
227
- "label": false,
228
- "text": "Beautiful writing Marred by One-Note Characterizations: How could Kingsolver have ruined her book with such an obvious error? Nathan is a strident paper doll that flattens the whole story. Just as bad, the author has all the narrators using the same ironic tone to decribe him, deadening their voices as well. At the same time, Kingsolver doesn't have the guts to show him doing something trully terrible. I don't trust an author who can't let the reader make up his own mind, and as a consequence I couldn't trust her views about ANYTHING in the story. I'm giving this two stars for her descriptions of the African landscape, and that is all.",
229
- "id": "44"
230
- },
231
- "45": {
232
- "label": false,
233
- "text": "Much worse than any cordless phone I've ever had: This phone cuts out only 2 rooms away from the base station. There is static noise, and callers on the other end complain about sound quality. I can't go into the garden, which used to be no problem with my old 900 MHz phone.",
234
- "id": "45"
235
- },
236
- "46": {
237
- "label": false,
238
- "text": "Waste of time & money: The first Hangover was not too bad, this one was just terrible. The acting is bad, the script is bad, everything about this movie was just bad. Do yourself a favor, don't buy this movie as it is a total waste of time and money.",
239
- "id": "46"
240
- },
241
- "47": {
242
- "label": false,
243
- "text": "Did Not Work For Me!: Impressive You Tube Video (Like a Sci-Fi Fantasy). In reality it's a high speed Easy Out so unsurprisingly it broke faster than an Easy out. This product did not work for me. The drill part did not drlil, the puller part did not pull. It was a total zero.",
244
- "id": "47"
245
- },
246
- "48": {
247
- "label": true,
248
- "text": "Excellent book, long overdue.: From a very long time women were told that looking good was of utmost importance. This was without regard to health or fitness and how age affected these parameters. Witness the whalebone and other types of corsets, the spike heeled shoes and the numerous weight loss programmes on the market (some of which are downright dangerous). Now there is a book, backed by solid research, that allows women of all ages to remain fit and healthy for a lifetime. I am certainly going to recommend this book to all the women I know.Bentley Norville",
249
- "id": "48"
250
- },
251
- "49": {
252
- "label": false,
253
- "text": "not an all star: Not a practical guide in this collecting age. Does NOT have a comprehensive list; meaning it does NOT cover all manufacturers and, more importantly, for the ones it does, only provides listings of the base set. That means no insert or variation pricing whatsoever. Also, no oddball or minor league issues are listed. Generally speaking, unless you are collecting base sets prior to the advent of inserts and alternate versions of the base set, this guide is fairly useless.",
254
- "id": "49"
255
- },
256
- "50": {
257
- "label": false,
258
- "text": "Again, second rate city, third rate writer: Just another example of Mr. Lindberg's pitiful attempt at exhibiting a strong expertise on a subject with which he is clearly obsessed. Don't waste your time with this book, either. It is poorly written and fails to engage the reader. You might consider using this book and the first book he wrote on the same subject, as a pair of bookends. That is about all they are worth.",
259
- "id": "50"
260
- },
261
- "51": {
262
- "label": true,
263
- "text": "Reality: No one should need to convince you to buy this book, you should just do it! It's so well written and worded and brings you right to the heart of a sexual reality that most people like to pretend doesn't really live and breath in their fair cities. I never again want to hear someone bad mouth a working girl for what she does. I will and do now however look at men with a curious eye wondering if they are depraved peep show window lickers :)",
264
- "id": "51"
265
- },
266
- "52": {
267
- "label": false,
268
- "text": "Bummer: Visual effects and Battle footage were great...the other 85% of the movie was just lousy fluff...",
269
- "id": "52"
270
- },
271
- "53": {
272
- "label": true,
273
- "text": "The spark of idependence: Filled with the independent spark that made us all love life at one point or another. A fun, introspective and nonsensical movie that sticks with you.",
274
- "id": "53"
275
- },
276
- "54": {
277
- "label": true,
278
- "text": "What I expected from Mirman's website. Funny. Funny. Russian.: lol, gotta love Eugene. Even when his audience doesn't initially laugh, he gets in a good zinger at himself and they laugh at that. He's witty without being condescending, and uncomplicated without seeing contrived. However, if you're not a fan of irreverant humor, this may not be for you.",
279
- "id": "54"
280
- },
281
- "55": {
282
- "label": false,
283
- "text": "Do not...repeat...do not bother!: It is not often that I offer a negative review but this compilation while attractive does not deliver at all.The foot massage gizmo is awkward and uncomfortable.The pumice stone leaves rough splinter like skin.The foot scrub doesn't reall scrub.The rotary action tool has five heads, none of which work well and you must hold the switch in place or it turns off. It is cumbersome and ineffective.The one star was initially given for a foot brush (which later lost its bristles very easily as I update the review) and a sweet smelling foot repair balm.Don't waist your money. Soak your feet and invest in an inexpensive German Titania file, smooth and coarser side, or a like product. It will last for years.",
284
- "id": "55"
285
- },
286
- "56": {
287
- "label": false,
288
- "text": "Not Sandra's Best: Ms. Brown has written better romance novels. Don't give up on her if this was your first Sandra book.The feeble female lead struggles with a 15-year crush that walks back into her life. The smug male lead acts like a jerk through most of the novel. The romance scenes grapple to muster up passion but fall short. Both of the main characters bothered me; my favorite character was the 17-year old.A quick read...about 4 hours (with interruptions) for me...but probably not worth it.",
289
- "id": "56"
290
- },
291
- "57": {
292
- "label": true,
293
- "text": "Impressed: Lots-O-Fun. Wood and glass toys are high quality and are a good fall back for the kids to play with they are \"bored\". Would buy again.",
294
- "id": "57"
295
- },
296
- "58": {
297
- "label": false,
298
- "text": "Light turned on by itself 3 times: The installation was easy. I used it for a week, everything worked fine, EXCEPT the light it connected to turned on by itself 3 times so far, with no one near to either one of the switch. Not sure whether it is a defective unit, or this product is too sensitive to noise. I'm returning this product and will just install a regular switch instead.",
299
- "id": "58"
300
- },
301
- "59": {
302
- "label": true,
303
- "text": "good battery: I feel kind of silly writing a review for a battery, but have to say that these last a LONG time. Work very well.",
304
- "id": "59"
305
- },
306
- "60": {
307
- "label": true,
308
- "text": "Even a Woman finds it funny: Yes, even a woman finds \"Married to Mommy\" funny. The book gets you laughing aloud when it is trying to make fun of \"Mommies\". The truth is that it really is making fun of the stupidity of men and their simple basic needs of sex, getting out of work, and beer. Of course, the truth is always funny.A definite MUST for any woman, married or not. We will now know all the secret tricks the men try to use on us.By the way, I am NOT a MOMMY!",
309
- "id": "60"
310
- },
311
- "61": {
312
- "label": true,
313
- "text": "Gungrave...not quite what you might expect: Those thinking this is another version of Trigun will be disappointed. Gungrave is actually a lot deeper and more complex. The lead is short on dialouge, but the story has more depth and character development than most anime. The first DVD is more about the main character's past than about the reanimated killing machine he's become, but it definitely leaves you wanting more.",
314
- "id": "61"
315
- },
316
- "62": {
317
- "label": true,
318
- "text": "Error in product description: It's great in every way. However, if you'd prefer a digital tuner (as I do), then you might need to look further. The product description boasts a digital AM/FM tuner, but it's disappointingly an analog AM/FM tuner.Overall - especially for the price - I think it's pretty good.",
319
- "id": "62"
320
- },
321
- "63": {
322
- "label": true,
323
- "text": "good phone but not as user friendly as it could be: Battery life is very good. Phone has good range. My only complaint is it's to involved to get your message from the handset.",
324
- "id": "63"
325
- },
326
- "64": {
327
- "label": false,
328
- "text": "Big waste of money (and space in my house!): My 5 year old son wanted this so bad, but when we got it for him, there were so many pieces to put together that didn't fit together well, he never played with it. It just sits on our floor in many pieces taking up toy space! What a waste!",
329
- "id": "64"
330
- },
331
- "65": {
332
- "label": true,
333
- "text": "Don't want to take it off: Very satisfied with an earlier purchase of this Bali bra model, I was just as pleased with the new one. Very comfortable, well made and a good neutral color. It will be my next choice, too.",
334
- "id": "65"
335
- },
336
- "66": {
337
- "label": true,
338
- "text": "Fantastico: If anybody who's into rock music is ever looking for a band to keep you on your toes, this is the band. I've been a fan for 10 years now, and no album has ever sounded like any of their previous albums. This disc is fantastic with such a variety of styles, as are the previous releases, even back to the Rainbow Butt Monkey days.",
339
- "id": "66"
340
- },
341
- "67": {
342
- "label": false,
343
- "text": "too much visual: There are far too much designs, visuals, colors, etc in the book - this is highly distracting, as TV screen can be...By way of example (among so many...), what is the use of colors with the three squares of the Pyth. theorem???? this is as useless as writting 2+3=5 with 2 in blue, 3 in red and 5 in yellow...I wish I had purchased the 2nd edition, which according to reviews was closer to what I was looking for.",
344
- "id": "67"
345
- },
346
- "68": {
347
- "label": true,
348
- "text": "Aretha's First Arista Release Showed Pleasures to Come: After a long and musically satisfying career with Atlantic, Aretha severed her ties with that company and moved under the wing of Arista's Clive Davis. With the start of the 1980's, Aretha was looking for new territory to conquer and almost succeeded with this mixed bag.\"United Together\" is a fine tune that benefits from beautiful orchestral arrangement that is matched by Aretha's superb vocal instrument. The remake of \"Can't Turn You Loose\" allows Aretha to show why she is the Queen of Soul\" for she really belts this one out. Another cover, that of the Doobies' \"What a Fool Believes,\" is an interesting interpretation. The final cut \"School Days\" appears to be \"autobiographical\" for every girl growing up in the fifties.Although not as strong as her Atlantic work, \"Aretha\" is still a suitable addition to the artist's discography.",
349
- "id": "68"
350
- },
351
- "69": {
352
- "label": false,
353
- "text": "Misguided Purchase: The photo and description do not reflect the product. The screen panel kit I received was white. What a huge inconvenience during a time-crunch.",
354
- "id": "69"
355
- },
356
- "70": {
357
- "label": false,
358
- "text": "Banacek: My husband and were looking forward to seeing this series.The first show was SO boring, we finally just quit watching it.Actually, we haven't gotten around to watching anymore. I guess we were afraid of a repeat.Maybe that was just once, I hope!",
359
- "id": "70"
360
- },
361
- "71": {
362
- "label": true,
363
- "text": "JDT: Uncle Tupelo is without doubt one of the most under appreciated groups of the 90's. Anodyne, like each of the three albums that came before it, has everything that a remarkable recording requires: great songs, honest lyrics, and artists who really care about the music they are making. Like the best of Dylan and Springsteen, the songs are about real people with real troubles and joys. When you hear them you know they are coming from the heart. The songs contributed by Jay Farrar and Jeff Tweedy are easily differentiated by the voacls, music, and lyrics. What makes this record interesting is how well these unique sounds compliment each other. The union is seamless.",
364
- "id": "71"
365
- },
366
- "72": {
367
- "label": true,
368
- "text": "Well Worth Reading: First a confession: Miriam Wasserman was my mother. However, she published several books, but this is the only one I really found useful. She walks the reader through the New York City school system and the attitudes of different groups involved in the system back in the 1960s. This includes parents, teachers and administrators. Her view is that the further away one got from parents and students, the more prestige one had. She meticulously describes the teachers' strike of 1968 against \"community control of schools\", a strike of which she is extremely critical. She explores the racism that was involved in this strike, including using quotes from striking teachers, etc. It should be emphasized that the author was pro-union all her life, so her views don't stem from an anti-union bias. The book also covers the high school student rebellion which coincided with and followed the strike.",
369
- "id": "72"
370
- },
371
- "73": {
372
- "label": true,
373
- "text": "compact and loaded: I bought this phone after reading the cnet reviews and really liked it. It looks small and really compact. I like the camera pics at 2 mega pixel and bright flash. The mp3 player is crisp. The headset that comes along delvers amazing fM radio. I think my phone is not very loud and you have a problem when you are around a noisy crowd. I just bought this phone again for my cousin. He likes it too. Almost forgot the display is very good.",
374
- "id": "73"
375
- },
376
- "74": {
377
- "label": true,
378
- "text": "Outstanding text!: Brooks/Cole should keep this text in their catalog for ages! It is well-written, examples are generally quite clear, vocabulary is introduced well, and the exercises develop real skills, rather than simply be busy-work. One of the best calculus books ever!",
379
- "id": "74"
380
- },
381
- "75": {
382
- "label": true,
383
- "text": "Excel 2003 Bible: Very good source of information. I will most likely buy other books in this series.",
384
- "id": "75"
385
- },
386
- "76": {
387
- "label": true,
388
- "text": "Tasting is Believing: Gluten-free breads used to have a gritty texture from the rice flour, and were too soft for sandwiches. Bette Hagman uses garbanzo/fava bean flour, sorghum flour, tapioca flour, and corn starch to create breads which have a similar texture to wheat flour breads, and the flavors of her breads are fabulous.My BF bought me this book and a great tasting beverage to drink it with. Since he knows I quit coffee recently, he's been really wonderful helping me in cope with my mood swings. S o y f e e is made from soy beans that is roasted just like coffee. I enjoy the taste and don't miss coffee one bit. Buy it online at www.s o y c o f fee.com.This is a 'must have' for anyone baking gluten-free. I think all of Bette Hagman's books are wonderful and a must for those with gluten intolerance.",
389
- "id": "76"
390
- },
391
- "77": {
392
- "label": true,
393
- "text": "5 stars for the show, no stars for the \"Collector's Edition\": I was really looking forward to getting this Collector's Edition and see what extras were added. I knew it wasn't a lot - just a mini-book and a documentary - but I figured it would be packaged in a cool way.Wrong.As others have already mentioned, the Collector's Edition is *literally* theAvatar: The Last Airbender - The Complete Book 1 Collectionslipped into another cardboard box, with a little booklet and DVD in an envelope (not even a case!) wedged in. It's really disappointing; it would have been so easy to create a quality Collector's Edition but the studio couldn't be bothered, I guess.",
394
- "id": "77"
395
- },
396
- "78": {
397
- "label": true,
398
- "text": "sula scottcampos: Sula, a book that talks about the issues of being a black women is a really good novel to read.One of the reasons I recommend it is because of its realism and its themes - death, sex, friendship and poverty.I also think that its characters are very good, its easy to identify with one or both of them. I really recommend this book to anyone who enjoys good literature.",
399
- "id": "78"
400
- },
401
- "79": {
402
- "label": true,
403
- "text": "Fantastic! It's a must-have for girls!: I hated razor, tried shaving but it did not work for me. Shaving made the hair grows thicker and faster afterwards, plus the roots are impossible to be getting rid of. After reading the reviews, I ordered it to try, I used it for once and already fall in love with this. I used to use small tweezer to pluck out my leg's hair, in order to avoid the razor, it took me a few hours to do that but this super electronic tweezer works wonder! You won't see the black roots and I have smooth and silkly legs in 20 mins. It does not hurt at all, if you use it on your legs. But, if you use it at your under arm, it won't be a pleasant feeling, of course! I will never use anything else besides this for hair removing anymore! highly recommended!",
404
- "id": "79"
405
- },
406
- "80": {
407
- "label": false,
408
- "text": "This is not a toy: I guess I was expecting more out of these leave window decals. I just didn't find them attractive after placing them on my window, they seem very cheap, I guess because they are cheap.I threw them away.",
409
- "id": "80"
410
- },
411
- "81": {
412
- "label": true,
413
- "text": "Wonderful book for anyone running a professional hatchery: This book is aimed more for hatcheries that are raising Trout, Salmon, Catfish and other food fishes. However, there is so much information in this book that even ornamental fish hatcheries will find an incredible amount of useful information. The chapters on Fish Nutrition are especially helpful.",
414
- "id": "81"
415
- },
416
- "82": {
417
- "label": true,
418
- "text": "Amazing book!!: Once again, Eric Victorino's artistic talent is put into this great free-verse poetry book. I couldn't put it down and I finished it the day I received it in the mail. All of the poems are awesome but the one I found the most interesting was \"It's A People Business.\" All of the experiences in his life, personally and with his band, come to life in this book. Please check it out! It's worth every penny!!",
419
- "id": "82"
420
- },
421
- "83": {
422
- "label": true,
423
- "text": "The white trumpet contender respect Miles Davis!: The story of the Jazz in the Fifties certainly would be remain unfinished without the ominous presence of this outstanding virtuoso. Baker sound still possesses this alluring hook, this magnetic engagement charm, eloquent expressiveness, enrapturing lyricism and contagious rhythm, despite the elapsed time, which confirms by itself the status of his musicianship.This selection is jus a little sample of the broad universe of his genius. A well thought selection of great musical successes, available, preserved and immortalized by the Digital Technology for our future enjoyment.Absolutely indispensable in your treasured collection.",
424
- "id": "83"
425
- },
426
- "84": {
427
- "label": false,
428
- "text": "What the?: I'm sorry, maybe it's just me but I can't helping stating that this has to be one of the wrost movies I've seen in my life!Can you say boring? Can you say doesn't make sense at all? The first 30 minutes of the movie were O.K. But it went downhill after that. This movie is a prime example of a director attempting to make a deep movie with a meaningful lesson but failed on all levels. I don't recommend this movie unless you want to go to sleep or you don't have anything else to do.",
429
- "id": "84"
430
- },
431
- "85": {
432
- "label": true,
433
- "text": "very very good!!!!: linda blair is a young girl who is possessed. and her mother doesn't know what to do until one day when she hears her daughter screaming and stabbind herself she knows what to do GET AN EXORCIZIM!!!",
434
- "id": "85"
435
- },
436
- "86": {
437
- "label": true,
438
- "text": "Awesome product for the price!: This range extender works as advertised! I am very happy with the purchase. I was a little worried after reading some of the horror stories here, but I have to say, Chovy's review instructions (on this site) were just this ticket to get the repeater up and running in less than 30 minutes. It was unbelievably easy to install! Do not be frightened by negative reviews. If you can set up a wireless network, you can set up this repeater. However, I did upgrade the firmware before I did anything else and maybe that helped. I got the firmware update from the Belkin site.",
439
- "id": "86"
440
- },
441
- "87": {
442
- "label": false,
443
- "text": "Slight: This book is either a heavily illustrated short story collection or a text-heavy comic. Its unusual format is its most original feature. Its plots are negligible, but its illustrations and text evoke a unique atmosphere of self-conscious nonconformism. Although its target audience is dare-to-be-different teens and college students, its interesting turns of phrase and expressive line drawings are not devoid of interest for general audences.",
444
- "id": "87"
445
- },
446
- "88": {
447
- "label": true,
448
- "text": "ANgeleyes: Seem to dry up their eyes fairly well, although I haven't seen the color (brown stain) change much yet.",
449
- "id": "88"
450
- },
451
- "89": {
452
- "label": false,
453
- "text": "Nice Try: Salt Lake 2002 is not a bad game, but it isn't good either. The graphics are excellent, but some of the events are bad. Bobsleigh, and skiing aren't bad but the others are. You dont stay into it for long. I liked it for a while, but it gets boring.",
454
- "id": "89"
455
- },
456
- "90": {
457
- "label": false,
458
- "text": "Cutler's share of the pie: This book was a major disappointment. I am familiar with books written solely by the Dalai Lama, such as the \"Library of Tibet\" series, which are much more engrossing and have much more substance than Cutler's book. Cutler attempts (successfully, sadly) to have his share of the profitable market that involves the Dalai Lama's writings. The book is insipid, does not try to explain any important issue in the light of Buddhist philosophy, and only rehashes issues that several other westerners already wrote about. It's another big ego trip: we keep hearing time and again about his opportunities to be with the Dalai Lama. What a shame, Cutler. I sold the book as soon as I finished it.",
459
- "id": "90"
460
- },
461
- "91": {
462
- "label": false,
463
- "text": "Mostly tedious, with interesting parts: I found the writing interesting, and the subject fascinating, but I found myself frustrated by the author's difficulty in talking directly about the status of Muslim women with her interview subjects. The author spent many pages writing about the menus and dress of the many middle and upper-middle class women she interviewed. It seemed as though her interview subjects resisted her efforts to discuss the status of women in their countries, so we too as readers had to wade through much distracting material and misunderstandings about feminism and gender. Great travel stories, but not a great source of information about Muslim women.",
464
- "id": "91"
465
- },
466
- "92": {
467
- "label": false,
468
- "text": "Sesame Street Toddler: I did not find this game to be as educationally sound as I would expect from Sesame street. There is too much talking before the program will react to a command. The graphics are jerky and the cursor acts like the target is magnetically charged and keeps pushing away the cursor. When the child actually does manage to click on a target, the cursor may still fly to another target and the child is told that his answer is wrong. Another example of educational problems is the pronunciation of \"eggs\" using a long \"a\" sound instead of a short \"e.\" This is not very helpful in teaching a child the sound for short \"e.\" Children that are used to playing computer games by themselves may find that this game is too frustrating to do alone. The open ended learning curve is a great idea. I just wish Sesame Street would hire a truly qualified literacy expert to help clean up the many problems in this program.",
469
- "id": "92"
470
- },
471
- "93": {
472
- "label": false,
473
- "text": "needs a buzz cut and a point: I avoided reading this book, not because of the hermaphrodite subject matter, but because I have never read a multigenerational family saga that I liked. Many books let me down in the middle, and this was no exception. The beginning of the book was incredible and harrowing, with momentum and characterization. The post-America nextgens part of the saga was so boring I found myself flipping and flipping - always a bad sign. If there was some kind of larger point to all of that, then I must have missed it. Yes there's the identity duality and trinity themes playing out here: man/woman, greek/turkish/american modern/old world sick/healthy innocent/guilty original/reinvented. But it was almost as if the author was saying - here it is again - get it? I like my fiction much more subtle than this.",
474
- "id": "93"
475
- },
476
- "94": {
477
- "label": false,
478
- "text": "OMG! DO NOT BUY!: I normally don't take the time to submit a review.In this case however, I feel obligated to do so.This is by far one of the worst purchases I have ever made.Here's why.....The contraption is far too bulky.The case's enclosing is unbearable, takes a good minute or so to open it.The texture of the material feels like a cheap toy.The overall design is horrible, something I could make in my basement.For the love of everything sacred, do not buy this thing.",
479
- "id": "94"
480
- },
481
- "95": {
482
- "label": true,
483
- "text": "Good price, good quality: Comparable HDMI cables can be bought for 45 or more. Even though the price is cheap the quality is good, no problems so far.",
484
- "id": "95"
485
- },
486
- "96": {
487
- "label": true,
488
- "text": "Good rock music: This is what i call rock music good beat and good lyrics, don't listen to the other reviews. This cd is one of the best, listen to a few songs and you will get hooked. I recommend this cd its awesome.",
489
- "id": "96"
490
- },
491
- "97": {
492
- "label": false,
493
- "text": "BORING!: This movie is soo boring. How in the hell did this movie make so much at the box office. Do people really want to pay for crappy movies like this. bottom line this is a chick flick nothing is good. And now they are re-releasing this movie with more boring stuff. This is the worst movie ever.",
494
- "id": "97"
495
- },
496
- "98": {
497
- "label": false,
498
- "text": "Already Rusting: Inferior quality. The plating is thin and rust is coming through the finish. Inexcusable for a product that is designed for use in a humid environment.",
499
- "id": "98"
500
- },
501
- "99": {
502
- "label": false,
503
- "text": "confusing internet setup: i wanted a camera that could email photos but this camera will not go out through the router and the manual setup , to punch a hole thru router is confusing.",
504
- "id": "99"
505
- },
506
- "55066581ad334ef5844c6f7707525010": {
507
- "label": true,
508
- "text": "Thought this was super cool, and a really important step in all the physical books' preservation.",
509
- "id": "55066581ad334ef5844c6f7707525010"
510
- },
511
- "fef14d13366f482d9f4e0726b357f178": {
512
- "label": true,
513
- "text": "There are some amazing hikes around Mt. Fuji.",
514
- "id": "fef14d13366f482d9f4e0726b357f178"
515
- },
516
- "70aed7369aa74031a06f5f3155476d7c": {
517
- "label": true,
518
- "text": "Thought this was super cool, and a really important step in preserving all the physical books.",
519
- "id": "70aed7369aa74031a06f5f3155476d7c"
520
- },
521
- "ac65d14b710648b8bf3c2a53caf6ac91": {
522
- "label": false,
523
- "text": "The profits of the business that was most successful were still negative.",
524
- "id": "ac65d14b710648b8bf3c2a53caf6ac91"
525
- },
526
- "ce00e6b1547444259a13c55654e66500": {
527
- "label": true,
528
- "text": "love them best, they reconnect in hysterically funny and emotionally significant ways.",
529
- "id": "ce00e6b1547444259a13c55654e66500"
530
- },
531
- "8943a94d205b43ceb4420d5ab9c5611a": {
532
- "label": true,
533
- "text": "Walt Disney's timeless masterpiece is an extravaganza of sight and sound! See the music come to life, hear the pictures burst into song and experience the excitement that is Fantasia over and over again.",
534
- "id": "8943a94d205b43ceb4420d5ab9c5611a"
535
- },
536
- "6af8fc3dd30d4f8caf5a2929fc88534b": {
537
- "label": false,
538
- "text": "A director struggles with a difficult sex scene between a young actor and actress who can't stand one another. Aided by her loyal assistant, she is hell-bent on getting the scene right without compromise.",
539
- "id": "6af8fc3dd30d4f8caf5a2929fc88534b"
540
- },
541
- "dbe571ed810d40f48170147dcab1c90f": {
542
- "label": false,
543
- "text": "sound created by drawing directly on the soundtrack).",
544
- "id": "dbe571ed810d40f48170147dcab1c90f"
545
- },
546
- "682102dfc5494f03926d16ae947a6250": {
547
- "label": true,
548
- "text": "one of glowing admiration! Written by Mark Toscano",
549
- "id": "682102dfc5494f03926d16ae947a6250"
550
- },
551
- "9b044458bb0e4bd68359e62d5fb4b979": {
552
- "label": false,
553
- "text": "Seth McArdle (Samuel Davis) is a high school senior with an especially full plate. Not only must he navigate the usual social and academic pitfalls of high school, but he has to contend with his young twin sisters, serving as de facto parent in the absence of his deceased mother and deadbeat father. The pressure mounts when the bank calls with a foreclosure warning, and Seth's frustrations spill",
554
- "id": "9b044458bb0e4bd68359e62d5fb4b979"
555
- },
556
- "abf2d24c7d8845769b7368be28f2c25d": {
557
- "label": true,
558
- "text": "Bjork is a beautiful creature and her music is stellar to anything I've ever heard. This DVD is essential for all Bjork fans, because you find something new every time you watch it.",
559
- "id": "abf2d24c7d8845769b7368be28f2c25d"
560
- }
561
- },
562
- "version": 11,
563
- "description": "Positive sentiment"
564
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/concept/lilac/positive-sentiment/sbert.pkl DELETED
Binary file (94.4 kB)
 
data/concept/lilac/profanity/concept.json DELETED
The diff for this file is too large to render. See raw diff
 
data/concept/lilac/profanity/openai.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a97028bbb8a75913874b83c768c5cdc8ad9ec00aba1ce4296429dd7326165ad7
3
- size 3247822
 
 
 
 
data/concept/lilac/profanity/sbert.pkl DELETED
Binary file (844 kB)
 
data/concept/lilac/toxicity/concept.json DELETED
The diff for this file is too large to render. See raw diff
 
data/concept/lilac/toxicity/sbert.pkl DELETED
Binary file (958 kB)
 
data/datasets/local/spotify/data-00000-of-00001.parquet DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:32224657332b09187a737c73ab634f9d14c9ba9a240bd105f1b9819cde2afcef
3
- size 37128682
 
 
 
 
data/datasets/local/spotify/manifest.json DELETED
@@ -1,27 +0,0 @@
1
- {
2
- "files": [
3
- "data-00000-of-00001.parquet"
4
- ],
5
- "data_schema": {
6
- "fields": {
7
- "artist": {
8
- "dtype": "string"
9
- },
10
- "song": {
11
- "dtype": "string"
12
- },
13
- "link": {
14
- "dtype": "string"
15
- },
16
- "text": {
17
- "dtype": "string"
18
- },
19
- "__line_number__": {
20
- "dtype": "int64"
21
- },
22
- "__rowid__": {
23
- "dtype": "string"
24
- }
25
- }
26
- }
27
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/datasets/local/spotify/settings.json DELETED
@@ -1 +0,0 @@
1
- {"ui": {"media_paths": [["text"]]}}
 
 
data/datasets/local/spotify/text/.concepts/local/aliens/sbert-neg-100.pkl DELETED
Binary file (169 kB)
 
data/datasets/local/spotify/text/lang_detection/data-00000-of-00001.parquet DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f1555427c8dc3b2f1e9310f5e71b46297e607f710365e107c73c894d5a8e1b0
3
- size 2033407
 
 
 
 
data/datasets/local/spotify/text/lang_detection/signal_manifest.json DELETED
@@ -1,36 +0,0 @@
1
- {
2
- "files": [
3
- "data-00000-of-00001.parquet"
4
- ],
5
- "parquet_id": "lang_detection(text)",
6
- "data_schema": {
7
- "fields": {
8
- "__rowid__": {
9
- "dtype": "string"
10
- },
11
- "text": {
12
- "fields": {
13
- "lang_detection": {
14
- "repeated_field": {
15
- "fields": {
16
- "lang_code": {
17
- "dtype": "string"
18
- }
19
- },
20
- "dtype": "string_span"
21
- },
22
- "signal": {
23
- "signal_name": "lang_detection"
24
- }
25
- }
26
- }
27
- }
28
- }
29
- },
30
- "signal": {
31
- "signal_name": "lang_detection"
32
- },
33
- "enriched_path": [
34
- "text"
35
- ]
36
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/datasets/local/spotify/text/sbert/data-00000-of-00001.parquet DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9796beb630cc3503f3c2ac9db8f71e4c1604570836d78bbf364e801cd427c39e
3
- size 2709987
 
 
 
 
data/datasets/local/spotify/text/sbert/embedding/local/outerspace/v34/data-00000-of-00001.parquet DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1ba0fe68cc02849b0a20b7f72047c8e9cb8e5ef5b57b0cd642fa0b0be8a6e06
3
- size 3340135
 
 
 
 
data/datasets/local/spotify/text/sbert/embedding/local/outerspace/v34/signal_manifest.json DELETED
@@ -1,64 +0,0 @@
1
- {
2
- "files": [
3
- "data-00000-of-00001.parquet"
4
- ],
5
- "parquet_id": "local/outerspace/v34(text.sbert.*.embedding)",
6
- "data_schema": {
7
- "fields": {
8
- "__rowid__": {
9
- "dtype": "string"
10
- },
11
- "text": {
12
- "fields": {
13
- "sbert": {
14
- "repeated_field": {
15
- "fields": {
16
- "embedding": {
17
- "fields": {
18
- "local/outerspace/v34": {
19
- "dtype": "float32",
20
- "signal": {
21
- "signal_name": "concept_score",
22
- "embedding": "sbert",
23
- "namespace": "local",
24
- "concept_name": "outerspace",
25
- "draft": "main",
26
- "num_negative_examples": 100
27
- },
28
- "bins": [
29
- [
30
- "Not in concept",
31
- null,
32
- 0.5
33
- ],
34
- [
35
- "In concept",
36
- 0.5,
37
- null
38
- ]
39
- ]
40
- }
41
- }
42
- }
43
- }
44
- }
45
- }
46
- }
47
- }
48
- }
49
- },
50
- "signal": {
51
- "signal_name": "concept_score",
52
- "embedding": "sbert",
53
- "namespace": "local",
54
- "concept_name": "outerspace",
55
- "draft": "main",
56
- "num_negative_examples": 100
57
- },
58
- "enriched_path": [
59
- "text",
60
- "sbert",
61
- "*",
62
- "embedding"
63
- ]
64
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/datasets/local/spotify/text/sbert/embeddings-00000-of-00001.keys.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5df43291782b8c731d4ce56537946654c642a01dc9a4e37de394836362f6b45
3
- size 3727400
 
 
 
 
data/datasets/local/spotify/text/sbert/embeddings-00000-of-00001.npy DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:94e10c23d7229541e1f60b791a659d13673b10a03649abf0ae092e0e18c5aee3
3
- size 170446976
 
 
 
 
data/datasets/local/spotify/text/sbert/signal_manifest.json DELETED
@@ -1,37 +0,0 @@
1
- {
2
- "files": [
3
- "data-00000-of-00001.parquet"
4
- ],
5
- "parquet_id": "sbert(text)",
6
- "data_schema": {
7
- "fields": {
8
- "__rowid__": {
9
- "dtype": "string"
10
- },
11
- "text": {
12
- "fields": {
13
- "sbert": {
14
- "repeated_field": {
15
- "fields": {
16
- "embedding": {
17
- "dtype": "embedding"
18
- }
19
- },
20
- "dtype": "string_span"
21
- },
22
- "signal": {
23
- "signal_name": "sbert"
24
- }
25
- }
26
- }
27
- }
28
- }
29
- },
30
- "signal": {
31
- "signal_name": "sbert"
32
- },
33
- "enriched_path": [
34
- "text"
35
- ],
36
- "embedding_filename_prefix": "embeddings-00000-of-00001"
37
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lilac/concepts/concept.py CHANGED
@@ -322,7 +322,8 @@ class ConceptModel:
322
  def _calibrate_on_dataset(self, column_info: ConceptColumnInfo) -> None:
323
  """Calibrate the model on the embeddings in the provided vector store."""
324
  db = get_dataset(column_info.namespace, column_info.name)
325
- vector_store = db.get_vector_store(self.embedding_name, normalize_path(column_info.path))
 
326
  keys = vector_store.keys()
327
  num_samples = min(column_info.num_negative_examples, len(keys))
328
  sample_keys = random.sample(keys, num_samples)
@@ -354,12 +355,10 @@ class ConceptModel:
354
 
355
  item_result: list[Item] = []
356
  for embedding_item, score in zip(item, scores):
357
- item_result.append(
358
- lilac_span(
359
- start=embedding_item[VALUE_KEY][TEXT_SPAN_START_FEATURE],
360
- end=embedding_item[VALUE_KEY][TEXT_SPAN_END_FEATURE],
361
- metadata={f'{self.namespace}/{self.concept_name}': score}))
362
- result_items.append({self.embedding_name: item_result})
363
  return result_items
364
 
365
  def coef(self, draft: DraftId) -> np.ndarray:
 
322
  def _calibrate_on_dataset(self, column_info: ConceptColumnInfo) -> None:
323
  """Calibrate the model on the embeddings in the provided vector store."""
324
  db = get_dataset(column_info.namespace, column_info.name)
325
+ vector_index = db.get_vector_db_index(self.embedding_name, normalize_path(column_info.path))
326
+ vector_store = vector_index.get_vector_store()
327
  keys = vector_store.keys()
328
  num_samples = min(column_info.num_negative_examples, len(keys))
329
  sample_keys = random.sample(keys, num_samples)
 
355
 
356
  item_result: list[Item] = []
357
  for embedding_item, score in zip(item, scores):
358
+ span = embedding_item[VALUE_KEY]
359
+ start, end = span[TEXT_SPAN_START_FEATURE], span[TEXT_SPAN_END_FEATURE]
360
+ item_result.append(lilac_span(start, end, {'score': score}))
361
+ result_items.append(item_result)
 
 
362
  return result_items
363
 
364
  def coef(self, draft: DraftId) -> np.ndarray:
lilac/config.py CHANGED
@@ -2,7 +2,7 @@
2
  import os
3
  from typing import Any, Literal, Optional, Union, cast
4
 
5
- from dotenv import dotenv_values
6
 
7
  EnvironmentKeys = Union[Literal['LILAC_DATA_PATH'],
8
  # Authentication on the demo.
@@ -20,41 +20,44 @@ EnvironmentKeys = Union[Literal['LILAC_DATA_PATH'],
20
  Literal['DUCKDB_USE_VIEWS'],
21
  # Debugging
22
  Literal['DEBUG'], Literal['DISABLE_LOGS']]
23
- _ENV: Optional[dict[str, Optional[str]]] = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
 
26
  def env(key: EnvironmentKeys, default: Optional[Any] = None) -> Any:
27
  """Return the value of an environment variable."""
28
- global _ENV
29
- first_load = False
30
- # This is done lazily so we can prevent loading local environment variables when testing. The
31
- # 'PYTEST_CURRENT_TEST' environment variable is only set after module initialization by pytest.
32
-
33
- if _ENV is None:
34
- in_test = os.environ.get('LILAC_TEST', None)
35
- _ENV = {
36
- **dotenv_values('.env'), # load shared variables
37
- **dotenv_values('.env.demo'), # load demo-specific environment flags.
38
- **(dotenv_values('.env.local') if not in_test else {})
39
- }
40
- first_load = True
41
-
42
- # Override the file based configs with the current environment, in case flags have changed.
43
- environment = {**_ENV, **os.environ}
44
-
45
- if first_load:
46
- if environment.get('LILAC_AUTH_ENABLED', None):
47
- if not environment.get('GOOGLE_CLIENT_ID', None) or not environment.get(
48
- 'GOOGLE_CLIENT_SECRET', None):
49
- raise ValueError(
50
- 'Missing `GOOGLE_CLIENT_ID` or `GOOGLE_CLIENT_SECRET` when `LILAC_AUTH_ENABLED=true`')
51
- SECRET_KEY = environment.get('LILAC_OAUTH_SECRET_KEY', None)
52
- if not SECRET_KEY:
53
- raise ValueError('Missing `LILAC_OAUTH_SECRET_KEY` when `LILAC_AUTH_ENABLED=true`')
54
-
55
- return environment.get(key, default)
56
 
57
 
58
  def data_path() -> str:
59
  """Return the base path for data."""
60
  return cast(str, env('LILAC_DATA_PATH', './data'))
 
 
 
 
 
2
  import os
3
  from typing import Any, Literal, Optional, Union, cast
4
 
5
+ from dotenv import load_dotenv
6
 
7
  EnvironmentKeys = Union[Literal['LILAC_DATA_PATH'],
8
  # Authentication on the demo.
 
20
  Literal['DUCKDB_USE_VIEWS'],
21
  # Debugging
22
  Literal['DEBUG'], Literal['DISABLE_LOGS']]
23
+
24
+
25
+ def _init_env() -> None:
26
+ in_test = os.environ.get('LILAC_TEST', None)
27
+ # Load the .env files into the environment in order of highest to lowest priority.
28
+
29
+ if not in_test: # Skip local environment variables when testing.
30
+ load_dotenv('.env.local')
31
+ load_dotenv('.env.demo')
32
+ load_dotenv('.env')
33
+
34
+ if os.environ.get('LILAC_AUTH_ENABLED', None):
35
+ if not os.environ.get('GOOGLE_CLIENT_ID', None) or not os.environ.get(
36
+ 'GOOGLE_CLIENT_SECRET', None):
37
+ raise ValueError(
38
+ 'Missing `GOOGLE_CLIENT_ID` or `GOOGLE_CLIENT_SECRET` when `LILAC_AUTH_ENABLED=true`')
39
+ SECRET_KEY = os.environ.get('LILAC_OAUTH_SECRET_KEY', None)
40
+ if not SECRET_KEY:
41
+ raise ValueError('Missing `LILAC_OAUTH_SECRET_KEY` when `LILAC_AUTH_ENABLED=true`')
42
+ if os.environ.get('LILAC_AUTH_ENABLED', None):
43
+ if not os.environ.get('GOOGLE_CLIENT_ID', None) or not os.environ.get(
44
+ 'GOOGLE_CLIENT_SECRET', None):
45
+ raise ValueError(
46
+ 'Missing `GOOGLE_CLIENT_ID` or `GOOGLE_CLIENT_SECRET` when `LILAC_AUTH_ENABLED=true`')
47
+ SECRET_KEY = os.environ.get('LILAC_OAUTH_SECRET_KEY', None)
48
+ if not SECRET_KEY:
49
+ raise ValueError('Missing `LILAC_OAUTH_SECRET_KEY` when `LILAC_AUTH_ENABLED=true`')
50
 
51
 
52
  def env(key: EnvironmentKeys, default: Optional[Any] = None) -> Any:
53
  """Return the value of an environment variable."""
54
+ return os.environ.get(key, default)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
 
57
  def data_path() -> str:
58
  """Return the base path for data."""
59
  return cast(str, env('LILAC_DATA_PATH', './data'))
60
+
61
+
62
+ # Initialize the environment at import time.
63
+ _init_env()
lilac/data/dataset.py CHANGED
@@ -12,7 +12,7 @@ from pydantic import Field as PydanticField
12
  from pydantic import StrictBool, StrictBytes, StrictFloat, StrictInt, StrictStr, validator
13
 
14
  from ..auth import UserInfo
15
- from ..embeddings.vector_store import VectorStore
16
  from ..schema import VALUE_KEY, Bin, DataType, Path, PathTuple, Schema, normalize_path
17
  from ..signals.signal import Signal, TextEmbeddingSignal, get_signal_by_type, resolve_signal
18
  from ..tasks import TaskStepId
@@ -165,7 +165,8 @@ ColumnId = Union[Path, Column]
165
 
166
  class DatasetUISettings(BaseModel):
167
  """The UI persistent settings for a dataset."""
168
- media_paths: list[PathTuple] = []
 
169
 
170
 
171
  class DatasetSettings(BaseModel):
@@ -282,9 +283,8 @@ class Dataset(abc.ABC):
282
  pass
283
 
284
  @abc.abstractmethod
285
- def get_vector_store(self, embedding: str, path: PathTuple) -> VectorStore:
286
- # TODO: Instead of this, allow selecting vectors via select_rows.
287
- """Get the vector store for a column."""
288
  pass
289
 
290
  @abc.abstractmethod
@@ -462,9 +462,9 @@ def default_settings(dataset: Dataset) -> DatasetSettings:
462
  stats: list[StatsResult] = list(pool.map(lambda leaf: dataset.stats(leaf), leaf_paths))
463
  sorted_stats = sorted([stat for stat in stats if stat.avg_text_length],
464
  key=lambda stat: stat.avg_text_length or -1.0)
465
- media_paths = []
466
  if sorted_stats:
467
- media_paths = [sorted_stats[-1].path]
468
 
469
  return DatasetSettings(ui=DatasetUISettings(media_paths=media_paths))
470
 
 
12
  from pydantic import StrictBool, StrictBytes, StrictFloat, StrictInt, StrictStr, validator
13
 
14
  from ..auth import UserInfo
15
+ from ..embeddings.vector_store import VectorDBIndex
16
  from ..schema import VALUE_KEY, Bin, DataType, Path, PathTuple, Schema, normalize_path
17
  from ..signals.signal import Signal, TextEmbeddingSignal, get_signal_by_type, resolve_signal
18
  from ..tasks import TaskStepId
 
165
 
166
  class DatasetUISettings(BaseModel):
167
  """The UI persistent settings for a dataset."""
168
+ media_paths: set[PathTuple] = set()
169
+ markdown_paths: set[PathTuple] = set()
170
 
171
 
172
  class DatasetSettings(BaseModel):
 
283
  pass
284
 
285
  @abc.abstractmethod
286
+ def get_vector_db_index(self, embedding: str, path: PathTuple) -> VectorDBIndex:
287
+ """Get the vector index for a path and an embedding."""
 
288
  pass
289
 
290
  @abc.abstractmethod
 
462
  stats: list[StatsResult] = list(pool.map(lambda leaf: dataset.stats(leaf), leaf_paths))
463
  sorted_stats = sorted([stat for stat in stats if stat.avg_text_length],
464
  key=lambda stat: stat.avg_text_length or -1.0)
465
+ media_paths: set[PathTuple] = set()
466
  if sorted_stats:
467
+ media_paths = set([sorted_stats[-1].path])
468
 
469
  return DatasetSettings(ui=DatasetUISettings(media_paths=media_paths))
470
 
lilac/data/dataset_duckdb.py CHANGED
@@ -19,7 +19,7 @@ from typing_extensions import override
19
  from ..auth import UserInfo
20
  from ..concepts.concept import ConceptColumnInfo
21
  from ..config import data_path, env
22
- from ..embeddings.vector_store import VectorStore
23
  from ..embeddings.vector_store_numpy import NumpyVectorStore
24
  from ..schema import (
25
  MANIFEST_FILENAME,
@@ -33,32 +33,31 @@ from ..schema import (
33
  Field,
34
  Item,
35
  Path,
 
36
  PathTuple,
37
  RichData,
38
  Schema,
39
- SignalInputType,
40
  SourceManifest,
41
- VectorKey,
42
  column_paths_match,
43
  is_float,
44
  is_integer,
45
  is_ordinal,
46
  is_temporal,
47
  normalize_path,
48
- signal_compute_type_supports_dtype,
49
  )
50
  from ..signals.concept_labels import ConceptLabelsSignal
51
  from ..signals.concept_scorer import ConceptScoreSignal
52
  from ..signals.semantic_similarity import SemanticSimilaritySignal
53
  from ..signals.signal import (
54
- EMBEDDING_KEY,
55
  Signal,
56
- TextEmbeddingModelSignal,
57
  TextEmbeddingSignal,
 
 
58
  resolve_signal,
59
  )
60
  from ..signals.substring_search import SubstringSignal
61
- from ..tasks import TaskStepId, TaskStepInfo, progress, set_worker_steps
62
  from ..utils import DebugTimer, get_dataset_output_dir, log, open_file
63
  from . import dataset
64
  from .dataset import (
@@ -95,13 +94,12 @@ from .dataset_utils import (
95
  flatten,
96
  flatten_keys,
97
  merge_schemas,
98
- read_embedding_index,
99
- replace_embeddings_with_none,
100
  schema_contains_path,
101
  sparse_to_dense_compute,
102
  unflatten,
103
  wrap_in_dicts,
104
- write_item_embeddings_to_disk,
105
  write_items_to_parquet,
106
  )
107
 
@@ -156,8 +154,8 @@ class DatasetDuckDB(Dataset):
156
  self._signal_manifests: list[SignalManifest] = []
157
  self.con = duckdb.connect(database=':memory:')
158
 
159
- # Maps a column path and embedding to the vector store. This is lazily generated as needed.
160
- self._col_vector_stores: dict[PathTuple, VectorStore] = {}
161
  self.vector_store_cls = vector_store_cls
162
  self._manifest_lock = threading.Lock()
163
 
@@ -196,7 +194,8 @@ class DatasetDuckDB(Dataset):
196
  signal_manifest = SignalManifest.parse_raw(f.read())
197
  self._signal_manifests.append(signal_manifest)
198
  signal_files = [os.path.join(root, f) for f in signal_manifest.files]
199
- self._create_view(signal_manifest.parquet_id, signal_files)
 
200
 
201
  merged_schema = merge_schemas([self._source_manifest.data_schema] +
202
  [m.data_schema for m in self._signal_manifests])
@@ -212,10 +211,13 @@ class DatasetDuckDB(Dataset):
212
  # NOTE: "root_column" for each signal is defined as the top-level column.
213
  select_sql = ', '.join([f'{SOURCE_VIEW_NAME}.*'] + [(
214
  f'{_escape_col_name(manifest.parquet_id)}.{_escape_col_name(_root_column(manifest))} '
215
- f'AS {_escape_col_name(manifest.parquet_id)}') for manifest in self._signal_manifests])
 
 
216
  join_sql = ' '.join([SOURCE_VIEW_NAME] + [
217
  f'join {_escape_col_name(manifest.parquet_id)} using ({UUID_COLUMN},)'
218
  for manifest in self._signal_manifests
 
219
  ])
220
  view_or_table = 'TABLE'
221
  use_views = env('DUCKDB_USE_VIEWS', 0) or 0
@@ -267,105 +269,42 @@ class DatasetDuckDB(Dataset):
267
  raise NotImplementedError('count is not yet implemented for DuckDB.')
268
 
269
  @override
270
- def get_vector_store(self, embedding: str, path: PathTuple) -> VectorStore:
271
  # Refresh the manifest to make sure we have the latest signal manifests.
272
  self.manifest()
 
 
 
273
 
274
- if path[-1] != EMBEDDING_KEY:
275
- path = (*path, embedding, PATH_WILDCARD, EMBEDDING_KEY)
276
-
277
- if path not in self._col_vector_stores:
278
- manifests = [
279
- m for m in self._signal_manifests
280
- if schema_contains_path(m.data_schema, path) and m.embedding_filename_prefix
281
- ]
282
- if not manifests:
283
- raise ValueError(f'No embedding found for path {path}.')
284
- if len(manifests) > 1:
285
- raise ValueError(f'Multiple embeddings found for path {path}. Got: {manifests}')
286
- manifest = manifests[0]
287
- if not manifest.embedding_filename_prefix:
288
- raise ValueError(f'Signal manifest for path {path} is not an embedding. '
289
- f'Got signal manifest: {manifest}')
290
-
291
- signal_name = cast(str, manifest.signal.signal_name)
292
- filepath_prefix = os.path.join(self.dataset_path, _signal_dir(manifest.enriched_path),
293
- signal_name, manifest.embedding_filename_prefix)
294
- keys, embeddings = read_embedding_index(filepath_prefix)
295
- # Get all the embeddings and pass it to the vector store.
296
- vector_store = self.vector_store_cls()
297
- vector_store.add(keys, embeddings)
298
- # Cache the vector store.
299
- self._col_vector_stores[path] = vector_store
300
-
301
- return self._col_vector_stores[path]
302
-
303
- def _prepare_signal(
304
- self,
305
- signal: Signal,
306
- source_path: PathTuple,
307
- manifest: DatasetManifest,
308
- compute_dependencies: Optional[bool] = False,
309
- task_step_id: Optional[TaskStepId] = None) -> tuple[PathTuple, Optional[TaskStepId]]:
310
- """Run all the signals dependencies required to run this signal.
311
-
312
- Args:
313
- signal: The signal to prepare.
314
- source_path: The source path the signal is running over.
315
- compute_dependencies: If True, signals will get computed for the whole column. If False,
316
- throw if the required inputs are not computed yet.
317
- task_step_id: The TaskStepId used to run the signal.
318
-
319
- Returns
320
- The final path the signal will be run over and the new step id for the final signal.
321
- """
322
- is_value_path = False
323
- if source_path[-1] == VALUE_KEY:
324
- is_value_path = True
325
- source_path = source_path[:-1]
326
-
327
- new_path = source_path
328
-
329
- signals_to_compute: list[tuple[PathTuple, Signal]] = []
330
- if isinstance(signal, TextEmbeddingModelSignal):
331
- embedding_signal = signal.get_embedding_signal()
332
- new_path = (*new_path, embedding_signal.key(), PATH_WILDCARD, EMBEDDING_KEY)
333
- if new_path not in manifest.data_schema.leafs:
334
- if not compute_dependencies:
335
- raise ValueError(f'Embedding signal "{embedding_signal.key()}" is not computed over '
336
- f'{source_path}. Please run `dataset.compute_signal` over '
337
- f'{source_path} first.')
338
- signals_to_compute.append((new_path, embedding_signal))
339
-
340
- new_steps = len(signals_to_compute)
341
- # Setup the task steps so the task progress indicator knows the number of steps before they are
342
- # computed.
343
- task_id: Optional[str] = None
344
- step_id: Optional[int] = None
345
- if task_step_id:
346
- (task_id, step_id) = task_step_id
347
- if task_id != '' and new_steps:
348
- # Make a step for the parent.
349
- set_worker_steps(task_id, [TaskStepInfo()] * (new_steps + 1))
350
-
351
- for i, (new_path, signal) in enumerate(signals_to_compute):
352
- if new_path not in manifest.data_schema.leafs:
353
- self.compute_signal(
354
- signal, source_path, task_step_id=(task_id, i) if task_id is not None else None)
355
-
356
- if is_value_path:
357
- new_path = (*new_path, VALUE_KEY)
358
-
359
- new_task_id: Optional[TaskStepId] = None
360
- if task_id is not None and step_id is not None:
361
- new_task_id = (task_id, step_id + new_steps)
362
- return (new_path, new_task_id)
363
 
364
  @override
365
  def compute_signal(self,
366
  signal: Signal,
367
  leaf_path: Path,
368
  task_step_id: Optional[TaskStepId] = None) -> None:
 
 
369
  source_path = normalize_path(leaf_path)
370
  manifest = self.manifest()
371
 
@@ -373,10 +312,6 @@ class DatasetDuckDB(Dataset):
373
  # Make a dummy task step so we report progress via tqdm.
374
  task_step_id = ('', 0)
375
 
376
- # Prepare the dependencies of this signal.
377
- signal_source_path, task_step_id = self._prepare_signal(
378
- signal, source_path, manifest, compute_dependencies=True, task_step_id=task_step_id)
379
-
380
  # The manifest may have changed after computing the dependencies.
381
  manifest = self.manifest()
382
 
@@ -392,9 +327,6 @@ class DatasetDuckDB(Dataset):
392
  df = select_rows_result.df()
393
  values = df['value']
394
 
395
- source_path = signal_source_path
396
- signal_col.path = source_path
397
-
398
  enriched_path = _col_destination_path(signal_col, is_computed_signal=True)
399
  spec = _split_path_into_subpaths_of_lists(enriched_path)
400
  output_dir = os.path.join(self.dataset_path, _signal_dir(enriched_path))
@@ -403,20 +335,6 @@ class DatasetDuckDB(Dataset):
403
  for uuid, item in zip(df[UUID_COLUMN], enriched_signal_items):
404
  item[UUID_COLUMN] = uuid
405
 
406
- is_embedding = isinstance(signal, TextEmbeddingSignal)
407
- embedding_filename_prefix = None
408
- if is_embedding:
409
- embedding_filename_prefix = os.path.basename(
410
- write_item_embeddings_to_disk(
411
- keys=df[UUID_COLUMN],
412
- embeddings=values,
413
- output_dir=output_dir,
414
- shard_index=0,
415
- num_shards=1))
416
-
417
- # Replace the embeddings with None so they are not serialized in the parquet file.
418
- enriched_signal_items = (replace_embeddings_with_none(item) for item in enriched_signal_items)
419
-
420
  enriched_signal_items = list(enriched_signal_items)
421
  parquet_filename, _ = write_items_to_parquet(
422
  items=enriched_signal_items,
@@ -431,12 +349,55 @@ class DatasetDuckDB(Dataset):
431
  data_schema=signal_schema,
432
  signal=signal,
433
  enriched_path=source_path,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
434
  parquet_id=make_parquet_id(signal, source_path, is_computed_signal=True),
435
  embedding_filename_prefix=embedding_filename_prefix)
436
  signal_manifest_filepath = os.path.join(output_dir, SIGNAL_MANIFEST_FILENAME)
 
437
  with open_file(signal_manifest_filepath, 'w') as f:
438
  f.write(signal_manifest.json(exclude_none=True, indent=2))
439
- log(f'Wrote signal output to {output_dir}')
440
 
441
  @override
442
  def delete_signal(self, signal_path: Path) -> None:
@@ -493,10 +454,9 @@ class DatasetDuckDB(Dataset):
493
 
494
  # Signal transforms must have the same dtype as the leaf field.
495
  signal = cast(Signal, col.signal_udf)
496
- compute_type = signal.compute_type
497
- if not signal_compute_type_supports_dtype(compute_type, leaf.dtype):
498
  raise ValueError(f'Leaf "{path}" has dtype "{leaf.dtype}" which is not supported '
499
- f'by "{signal.key()}" with signal input type "{compute_type}".')
500
 
501
  def _validate_selection(self, columns: Sequence[Column], select_schema: Schema) -> None:
502
  # Validate all the columns and make sure they exist in the `select_schema`.
@@ -728,8 +688,7 @@ class DatasetDuckDB(Dataset):
728
  if not udf_cols_to_sort_by:
729
  return None
730
  udf_col = udf_cols_to_sort_by[0]
731
- if udf_col.signal_udf and (udf_col.signal_udf.compute_type
732
- not in [SignalInputType.TEXT_EMBEDDING]):
733
  return None
734
  return udf_col
735
 
@@ -801,14 +760,6 @@ class DatasetDuckDB(Dataset):
801
  if (UUID_COLUMN,) not in col_paths:
802
  cols.append(column_from_identifier(UUID_COLUMN))
803
 
804
- # Prepare UDF columns. Throw an error if they are not computed. Update the paths of the UDFs so
805
- # they match the paths of the columns defined by splits and embeddings.
806
- for col in cols:
807
- if col.signal_udf:
808
- # Do not auto-compute dependencies, throw an error if they are not computed.
809
- col.path, _ = self._prepare_signal(
810
- col.signal_udf, col.path, manifest, compute_dependencies=False)
811
-
812
  schema = manifest.data_schema
813
 
814
  if combine_columns:
@@ -825,9 +776,8 @@ class DatasetDuckDB(Dataset):
825
  for udf_col in udf_columns:
826
  if isinstance(udf_col.signal_udf, ConceptScoreSignal):
827
  # Set dataset information on the signal.
828
- source_path = udf_col.path if udf_col.path[-1] != EMBEDDING_KEY else udf_col.path[:-3]
829
  udf_col.signal_udf.set_column_info(
830
- ConceptColumnInfo(namespace=self.namespace, name=self.dataset_name, path=source_path))
831
 
832
  if isinstance(udf_col.signal_udf, (ConceptScoreSignal, ConceptLabelsSignal)):
833
  # Concept are access controlled so we tell it about the user.
@@ -863,19 +813,20 @@ class DatasetDuckDB(Dataset):
863
 
864
  topk_udf_col = self._topk_udf_to_sort_by(udf_columns, sort_by, limit, sort_order)
865
  if topk_udf_col:
866
- key_prefixes: Optional[Iterable[VectorKey]] = None
867
  if where_query:
868
  # If there are filters, we need to send UUIDs to the top k query.
869
  df = con.execute(f'SELECT {UUID_COLUMN} FROM t {where_query}').df()
870
  total_num_rows = len(df)
871
- key_prefixes = df[UUID_COLUMN]
 
872
 
873
- topk_signal = cast(TextEmbeddingModelSignal, topk_udf_col.signal_udf)
874
  # The input is an embedding.
875
- vector_store = self.get_vector_store(topk_signal.embedding, topk_udf_col.path)
876
  k = (limit or 0) + (offset or 0)
877
- topk = topk_signal.vector_compute_topk(k, vector_store, key_prefixes)
878
- topk_uuids = list(dict.fromkeys([cast(str, key[0]) for key, _ in topk]))
879
 
880
  # Ignore all the other filters and filter DuckDB results only by the top k UUIDs.
881
  uuid_filter = Filter(path=(UUID_COLUMN,), op=ListOp.IN, value=topk_uuids)
@@ -995,13 +946,12 @@ class DatasetDuckDB(Dataset):
995
  with DebugTimer(f'Computing signal "{signal.signal_name}"'):
996
  signal.setup()
997
 
998
- if signal.compute_type in [SignalInputType.TEXT_EMBEDDING]:
999
- # The input is an embedding.
1000
- embedding_signal = cast(TextEmbeddingModelSignal, signal)
1001
- vector_store = self.get_vector_store(embedding_signal.embedding, udf_col.path)
1002
  flat_keys = list(flatten_keys(df[UUID_COLUMN], input))
1003
  signal_out = sparse_to_dense_compute(
1004
- iter(flat_keys), lambda keys: signal.vector_compute(keys, vector_store))
1005
  # Add progress.
1006
  if task_step_id is not None:
1007
  signal_out = progress(
@@ -1115,14 +1065,6 @@ class DatasetDuckDB(Dataset):
1115
  if (UUID_COLUMN,) not in col_paths:
1116
  cols.append(column_from_identifier(UUID_COLUMN))
1117
 
1118
- # Prepare UDF columns. Throw an error if they are not computed. Update the paths of the UDFs so
1119
- # they match the paths of the columns defined by splits and embeddings.
1120
- for col in cols:
1121
- if col.signal_udf:
1122
- # Do not auto-compute dependencies, throw an error if they are not computed.
1123
- col.path, _ = self._prepare_signal(
1124
- col.signal_udf, col.path, manifest, compute_dependencies=False)
1125
-
1126
  self._normalize_searches(searches, manifest)
1127
  search_udfs = self._search_udfs(searches, manifest)
1128
  cols.extend([search_udf.udf for search_udf in search_udfs])
@@ -1188,6 +1130,8 @@ class DatasetDuckDB(Dataset):
1188
  select_leaf = select_leaf or column.signal_udf is not None
1189
 
1190
  for m in parquet_manifests:
 
 
1191
  # Skip this parquet file if it doesn't contain the path.
1192
  if not schema_contains_path(m.data_schema, path):
1193
  continue
@@ -1284,9 +1228,8 @@ class DatasetDuckDB(Dataset):
1284
  if not embedding:
1285
  raise ValueError(f'Please provide an embedding for semantic search. Got search: {search}')
1286
 
1287
- embedding_path = (*search_path, embedding, PATH_WILDCARD, EMBEDDING_KEY)
1288
  try:
1289
- manifest.data_schema.get_field(embedding_path)
1290
  except Exception as e:
1291
  raise ValueError(
1292
  f'Embedding {embedding} has not been computed. '
@@ -1314,7 +1257,7 @@ class DatasetDuckDB(Dataset):
1314
  output_path=_col_destination_path(concept_labels_udf),
1315
  sort=None))
1316
 
1317
- udf = Column(path=embedding_path, signal_udf=search_signal)
1318
 
1319
  output_path = _col_destination_path(udf)
1320
  search_udfs.append(
@@ -1373,7 +1316,7 @@ class DatasetDuckDB(Dataset):
1373
  sql_op = BINARY_OP_TO_SQL[cast(BinaryOp, f.op)]
1374
  filter_val = cast(FeatureValue, f.value)
1375
  if isinstance(filter_val, str):
1376
- filter_val = f"'{filter_val}'"
1377
  elif isinstance(filter_val, bytes):
1378
  filter_val = _bytes_to_blob_literal(filter_val)
1379
  else:
 
19
  from ..auth import UserInfo
20
  from ..concepts.concept import ConceptColumnInfo
21
  from ..config import data_path, env
22
+ from ..embeddings.vector_store import VectorDBIndex, VectorStore
23
  from ..embeddings.vector_store_numpy import NumpyVectorStore
24
  from ..schema import (
25
  MANIFEST_FILENAME,
 
33
  Field,
34
  Item,
35
  Path,
36
+ PathKey,
37
  PathTuple,
38
  RichData,
39
  Schema,
 
40
  SourceManifest,
 
41
  column_paths_match,
42
  is_float,
43
  is_integer,
44
  is_ordinal,
45
  is_temporal,
46
  normalize_path,
47
+ signal_type_supports_dtype,
48
  )
49
  from ..signals.concept_labels import ConceptLabelsSignal
50
  from ..signals.concept_scorer import ConceptScoreSignal
51
  from ..signals.semantic_similarity import SemanticSimilaritySignal
52
  from ..signals.signal import (
 
53
  Signal,
 
54
  TextEmbeddingSignal,
55
+ VectorSignal,
56
+ get_signal_by_type,
57
  resolve_signal,
58
  )
59
  from ..signals.substring_search import SubstringSignal
60
+ from ..tasks import TaskStepId, progress
61
  from ..utils import DebugTimer, get_dataset_output_dir, log, open_file
62
  from . import dataset
63
  from .dataset import (
 
94
  flatten,
95
  flatten_keys,
96
  merge_schemas,
97
+ read_embeddings_from_disk,
 
98
  schema_contains_path,
99
  sparse_to_dense_compute,
100
  unflatten,
101
  wrap_in_dicts,
102
+ write_embeddings_to_disk,
103
  write_items_to_parquet,
104
  )
105
 
 
154
  self._signal_manifests: list[SignalManifest] = []
155
  self.con = duckdb.connect(database=':memory:')
156
 
157
+ # Maps a path and embedding to the vector index. This is lazily generated as needed.
158
+ self._vector_indices: dict[tuple[PathKey, str], VectorDBIndex] = {}
159
  self.vector_store_cls = vector_store_cls
160
  self._manifest_lock = threading.Lock()
161
 
 
194
  signal_manifest = SignalManifest.parse_raw(f.read())
195
  self._signal_manifests.append(signal_manifest)
196
  signal_files = [os.path.join(root, f) for f in signal_manifest.files]
197
+ if signal_files:
198
+ self._create_view(signal_manifest.parquet_id, signal_files)
199
 
200
  merged_schema = merge_schemas([self._source_manifest.data_schema] +
201
  [m.data_schema for m in self._signal_manifests])
 
211
  # NOTE: "root_column" for each signal is defined as the top-level column.
212
  select_sql = ', '.join([f'{SOURCE_VIEW_NAME}.*'] + [(
213
  f'{_escape_col_name(manifest.parquet_id)}.{_escape_col_name(_root_column(manifest))} '
214
+ f'AS {_escape_col_name(manifest.parquet_id)}')
215
+ for manifest in self._signal_manifests
216
+ if manifest.files])
217
  join_sql = ' '.join([SOURCE_VIEW_NAME] + [
218
  f'join {_escape_col_name(manifest.parquet_id)} using ({UUID_COLUMN},)'
219
  for manifest in self._signal_manifests
220
+ if manifest.files
221
  ])
222
  view_or_table = 'TABLE'
223
  use_views = env('DUCKDB_USE_VIEWS', 0) or 0
 
269
  raise NotImplementedError('count is not yet implemented for DuckDB.')
270
 
271
  @override
272
+ def get_vector_db_index(self, embedding: str, path: PathTuple) -> VectorDBIndex:
273
  # Refresh the manifest to make sure we have the latest signal manifests.
274
  self.manifest()
275
+ index_key = (path, embedding)
276
+ if index_key in self._vector_indices:
277
+ return self._vector_indices[index_key]
278
 
279
+ manifests = [
280
+ m for m in self._signal_manifests
281
+ if schema_contains_path(m.data_schema, path) and m.embedding_filename_prefix
282
+ ]
283
+ if not manifests:
284
+ raise ValueError(f'No embedding found for path {path}.')
285
+ if len(manifests) > 1:
286
+ raise ValueError(f'Multiple embeddings found for path {path}. Got: {manifests}')
287
+ manifest = manifests[0]
288
+ if not manifest.embedding_filename_prefix:
289
+ raise ValueError(f'Signal manifest for path {path} is not an embedding. '
290
+ f'Got signal manifest: {manifest}')
291
+
292
+ signal_name = cast(str, manifest.signal.signal_name)
293
+ filepath_prefix = os.path.join(self.dataset_path, _signal_dir(manifest.enriched_path),
294
+ signal_name, manifest.embedding_filename_prefix)
295
+ spans, embeddings = read_embeddings_from_disk(filepath_prefix)
296
+ vector_index = VectorDBIndex(self.vector_store_cls, spans, embeddings)
297
+ # Cache the vector index.
298
+ self._vector_indices[index_key] = vector_index
299
+ return vector_index
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
 
301
  @override
302
  def compute_signal(self,
303
  signal: Signal,
304
  leaf_path: Path,
305
  task_step_id: Optional[TaskStepId] = None) -> None:
306
+ if isinstance(signal, TextEmbeddingSignal):
307
+ return self.compute_embedding(signal.name, leaf_path, task_step_id)
308
  source_path = normalize_path(leaf_path)
309
  manifest = self.manifest()
310
 
 
312
  # Make a dummy task step so we report progress via tqdm.
313
  task_step_id = ('', 0)
314
 
 
 
 
 
315
  # The manifest may have changed after computing the dependencies.
316
  manifest = self.manifest()
317
 
 
327
  df = select_rows_result.df()
328
  values = df['value']
329
 
 
 
 
330
  enriched_path = _col_destination_path(signal_col, is_computed_signal=True)
331
  spec = _split_path_into_subpaths_of_lists(enriched_path)
332
  output_dir = os.path.join(self.dataset_path, _signal_dir(enriched_path))
 
335
  for uuid, item in zip(df[UUID_COLUMN], enriched_signal_items):
336
  item[UUID_COLUMN] = uuid
337
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
338
  enriched_signal_items = list(enriched_signal_items)
339
  parquet_filename, _ = write_items_to_parquet(
340
  items=enriched_signal_items,
 
349
  data_schema=signal_schema,
350
  signal=signal,
351
  enriched_path=source_path,
352
+ parquet_id=make_parquet_id(signal, source_path, is_computed_signal=True))
353
+ signal_manifest_filepath = os.path.join(output_dir, SIGNAL_MANIFEST_FILENAME)
354
+ with open_file(signal_manifest_filepath, 'w') as f:
355
+ f.write(signal_manifest.json(exclude_none=True, indent=2))
356
+ log(f'Wrote signal output to {output_dir}')
357
+
358
+ @override
359
+ def compute_embedding(self,
360
+ embedding: str,
361
+ leaf_path: Path,
362
+ task_step_id: Optional[TaskStepId] = None) -> None:
363
+ source_path = normalize_path(leaf_path)
364
+ manifest = self.manifest()
365
+
366
+ if task_step_id is None:
367
+ # Make a dummy task step so we report progress via tqdm.
368
+ task_step_id = ('', 0)
369
+
370
+ signal = get_signal_by_type(embedding, TextEmbeddingSignal)()
371
+ signal_col = Column(path=source_path, alias='value', signal_udf=signal)
372
+ select_rows_result = self.select_rows([signal_col],
373
+ task_step_id=task_step_id,
374
+ resolve_span=True)
375
+ df = select_rows_result.df()
376
+ values = df['value']
377
+
378
+ enriched_path = _col_destination_path(signal_col, is_computed_signal=True)
379
+ output_dir = os.path.join(self.dataset_path, _signal_dir(enriched_path))
380
+ signal_schema = create_signal_schema(signal, source_path, manifest.data_schema)
381
+ embedding_filename_prefix = os.path.basename(
382
+ write_embeddings_to_disk(
383
+ uuids=df[UUID_COLUMN],
384
+ signal_items=values,
385
+ output_dir=output_dir,
386
+ shard_index=0,
387
+ num_shards=1))
388
+
389
+ signal_manifest = SignalManifest(
390
+ files=[],
391
+ data_schema=signal_schema,
392
+ signal=signal,
393
+ enriched_path=source_path,
394
  parquet_id=make_parquet_id(signal, source_path, is_computed_signal=True),
395
  embedding_filename_prefix=embedding_filename_prefix)
396
  signal_manifest_filepath = os.path.join(output_dir, SIGNAL_MANIFEST_FILENAME)
397
+
398
  with open_file(signal_manifest_filepath, 'w') as f:
399
  f.write(signal_manifest.json(exclude_none=True, indent=2))
400
+ log(f'Wrote embedding index to {output_dir}')
401
 
402
  @override
403
  def delete_signal(self, signal_path: Path) -> None:
 
454
 
455
  # Signal transforms must have the same dtype as the leaf field.
456
  signal = cast(Signal, col.signal_udf)
457
+ if not signal_type_supports_dtype(signal.input_type, leaf.dtype):
 
458
  raise ValueError(f'Leaf "{path}" has dtype "{leaf.dtype}" which is not supported '
459
+ f'by "{signal.key()}" with signal input type "{signal.input_type}".')
460
 
461
  def _validate_selection(self, columns: Sequence[Column], select_schema: Schema) -> None:
462
  # Validate all the columns and make sure they exist in the `select_schema`.
 
688
  if not udf_cols_to_sort_by:
689
  return None
690
  udf_col = udf_cols_to_sort_by[0]
691
+ if udf_col.signal_udf and not isinstance(udf_col.signal_udf, VectorSignal):
 
692
  return None
693
  return udf_col
694
 
 
760
  if (UUID_COLUMN,) not in col_paths:
761
  cols.append(column_from_identifier(UUID_COLUMN))
762
 
 
 
 
 
 
 
 
 
763
  schema = manifest.data_schema
764
 
765
  if combine_columns:
 
776
  for udf_col in udf_columns:
777
  if isinstance(udf_col.signal_udf, ConceptScoreSignal):
778
  # Set dataset information on the signal.
 
779
  udf_col.signal_udf.set_column_info(
780
+ ConceptColumnInfo(namespace=self.namespace, name=self.dataset_name, path=udf_col.path))
781
 
782
  if isinstance(udf_col.signal_udf, (ConceptScoreSignal, ConceptLabelsSignal)):
783
  # Concept are access controlled so we tell it about the user.
 
813
 
814
  topk_udf_col = self._topk_udf_to_sort_by(udf_columns, sort_by, limit, sort_order)
815
  if topk_udf_col:
816
+ path_keys: Optional[Iterable[PathKey]] = None
817
  if where_query:
818
  # If there are filters, we need to send UUIDs to the top k query.
819
  df = con.execute(f'SELECT {UUID_COLUMN} FROM t {where_query}').df()
820
  total_num_rows = len(df)
821
+ # Convert UUIDs to path keys.
822
+ path_keys = [(uuid,) for uuid in df[UUID_COLUMN]]
823
 
824
+ topk_signal = cast(VectorSignal, topk_udf_col.signal_udf)
825
  # The input is an embedding.
826
+ vector_index = self.get_vector_db_index(topk_signal.embedding, topk_udf_col.path)
827
  k = (limit or 0) + (offset or 0)
828
+ topk = topk_signal.vector_compute_topk(k, vector_index, path_keys)
829
+ topk_uuids = list(dict.fromkeys([cast(str, path_key[0]) for path_key, _ in topk]))
830
 
831
  # Ignore all the other filters and filter DuckDB results only by the top k UUIDs.
832
  uuid_filter = Filter(path=(UUID_COLUMN,), op=ListOp.IN, value=topk_uuids)
 
946
  with DebugTimer(f'Computing signal "{signal.signal_name}"'):
947
  signal.setup()
948
 
949
+ if isinstance(signal, VectorSignal):
950
+ embedding_signal = signal
951
+ vector_store = self.get_vector_db_index(embedding_signal.embedding, udf_col.path)
 
952
  flat_keys = list(flatten_keys(df[UUID_COLUMN], input))
953
  signal_out = sparse_to_dense_compute(
954
+ iter(flat_keys), lambda keys: embedding_signal.vector_compute(keys, vector_store))
955
  # Add progress.
956
  if task_step_id is not None:
957
  signal_out = progress(
 
1065
  if (UUID_COLUMN,) not in col_paths:
1066
  cols.append(column_from_identifier(UUID_COLUMN))
1067
 
 
 
 
 
 
 
 
 
1068
  self._normalize_searches(searches, manifest)
1069
  search_udfs = self._search_udfs(searches, manifest)
1070
  cols.extend([search_udf.udf for search_udf in search_udfs])
 
1130
  select_leaf = select_leaf or column.signal_udf is not None
1131
 
1132
  for m in parquet_manifests:
1133
+ if not m.files:
1134
+ continue
1135
  # Skip this parquet file if it doesn't contain the path.
1136
  if not schema_contains_path(m.data_schema, path):
1137
  continue
 
1228
  if not embedding:
1229
  raise ValueError(f'Please provide an embedding for semantic search. Got search: {search}')
1230
 
 
1231
  try:
1232
+ manifest.data_schema.get_field((*search_path, embedding))
1233
  except Exception as e:
1234
  raise ValueError(
1235
  f'Embedding {embedding} has not been computed. '
 
1257
  output_path=_col_destination_path(concept_labels_udf),
1258
  sort=None))
1259
 
1260
+ udf = Column(path=search_path, signal_udf=search_signal)
1261
 
1262
  output_path = _col_destination_path(udf)
1263
  search_udfs.append(
 
1316
  sql_op = BINARY_OP_TO_SQL[cast(BinaryOp, f.op)]
1317
  filter_val = cast(FeatureValue, f.value)
1318
  if isinstance(filter_val, str):
1319
+ filter_val = _escape_string_literal(filter_val)
1320
  elif isinstance(filter_val, bytes):
1321
  filter_val = _bytes_to_blob_literal(filter_val)
1322
  else:
lilac/data/dataset_test_utils.py CHANGED
@@ -4,8 +4,10 @@ import pathlib
4
  from datetime import datetime
5
  from typing import Optional, Type, cast
6
 
 
7
  from typing_extensions import Protocol
8
 
 
9
  from ..schema import (
10
  MANIFEST_FILENAME,
11
  PARQUET_FILENAME_PREFIX,
@@ -13,14 +15,13 @@ from ..schema import (
13
  DataType,
14
  Field,
15
  Item,
 
16
  Schema,
17
  SourceManifest,
18
- field,
19
  )
20
- from ..signals.signal import EMBEDDING_KEY
21
  from ..utils import get_dataset_output_dir, open_file
22
  from .dataset import Dataset
23
- from .dataset_utils import is_primitive, lilac_span, write_items_to_parquet
24
 
25
  TEST_NAMESPACE = 'test_namespace'
26
  TEST_DATASET_NAME = 'test_dataset'
@@ -109,11 +110,16 @@ def enriched_item(value: Optional[Item] = None, metadata: dict[str, Item] = {})
109
  return {VALUE_KEY: value, **metadata}
110
 
111
 
112
- def enriched_embedding_span(start: int, end: int, metadata: dict[str, Item] = {}) -> Item:
113
- """Makes an item that represents an embedding span that was enriched with metadata."""
114
- return lilac_span(start, end, {EMBEDDING_KEY: {VALUE_KEY: None, **metadata}})
 
 
 
 
 
 
 
 
115
 
116
-
117
- def enriched_embedding_span_field(metadata: Optional[object] = {}) -> Field:
118
- """Makes a field that represents an embedding span that was enriched with metadata."""
119
- return field('string_span', fields={EMBEDDING_KEY: field('embedding', fields=metadata)})
 
4
  from datetime import datetime
5
  from typing import Optional, Type, cast
6
 
7
+ import numpy as np
8
  from typing_extensions import Protocol
9
 
10
+ from ..embeddings.vector_store import VectorDBIndex, VectorStore
11
  from ..schema import (
12
  MANIFEST_FILENAME,
13
  PARQUET_FILENAME_PREFIX,
 
15
  DataType,
16
  Field,
17
  Item,
18
+ PathKey,
19
  Schema,
20
  SourceManifest,
 
21
  )
 
22
  from ..utils import get_dataset_output_dir, open_file
23
  from .dataset import Dataset
24
+ from .dataset_utils import is_primitive, write_items_to_parquet
25
 
26
  TEST_NAMESPACE = 'test_namespace'
27
  TEST_DATASET_NAME = 'test_dataset'
 
110
  return {VALUE_KEY: value, **metadata}
111
 
112
 
113
+ def make_vector_index(vector_store_cls: Type[VectorStore],
114
+ vector_dict: dict[PathKey, list[list[float]]]) -> VectorDBIndex:
115
+ """Make a vector index from a dictionary of vector keys to vectors."""
116
+ embeddings: list[np.ndarray] = []
117
+ spans: list[tuple[PathKey, list[tuple[int, int]]]] = []
118
+ for path_key, vectors in vector_dict.items():
119
+ vector_spans: list[tuple[int, int]] = []
120
+ for i, vector in enumerate(vectors):
121
+ embeddings.append(np.array(vector))
122
+ vector_spans.append((0, 0))
123
+ spans.append((path_key, vector_spans))
124
 
125
+ return VectorDBIndex(vector_store_cls, spans, np.array(embeddings))
 
 
 
lilac/data/dataset_utils.py CHANGED
@@ -22,6 +22,7 @@ from ..schema import (
22
  VALUE_KEY,
23
  Field,
24
  Item,
 
25
  PathTuple,
26
  Schema,
27
  VectorKey,
@@ -33,6 +34,7 @@ from ..signals.signal import EMBEDDING_KEY, Signal
33
  from ..utils import file_exists, log, open_file
34
 
35
  _KEYS_SUFFIX = '.keys.pkl'
 
36
  _EMBEDDINGS_SUFFIX = '.npy'
37
 
38
 
@@ -218,53 +220,58 @@ def create_signal_schema(signal: Signal, source_path: PathTuple, current_schema:
218
  return schema({UUID_COLUMN: 'string', **cast(dict, enriched_schema.fields)})
219
 
220
 
221
- def write_item_embeddings_to_disk(keys: Iterable[str], embeddings: Iterable[Item], output_dir: str,
222
- shard_index: int, num_shards: int) -> str:
223
  """Write a set of embeddings to disk."""
224
  output_path_prefix = embedding_index_filename_prefix(output_dir, shard_index, num_shards)
225
 
226
- # Restrict the keys to only those that are embeddings.
227
  def embedding_predicate(input: Any) -> bool:
228
- return isinstance(input, np.ndarray)
 
229
 
230
- flat_keys = flatten_keys(keys, embeddings, is_primitive_predicate=embedding_predicate)
231
- flat_embeddings = cast(Iterable[Item],
232
- flatten(embeddings, is_primitive_predicate=embedding_predicate))
233
 
234
  embedding_vectors: list[np.ndarray] = []
235
- embedding_keys: list[VectorKey] = []
236
- for key, lilac_embedding in zip(flat_keys, flat_embeddings):
237
- if not key or not lilac_embedding or EMBEDDING_KEY not in lilac_embedding:
238
  # Sparse embeddings may not have an embedding for every key.
239
  continue
240
 
241
- # We use squeeze here because embedding functions can return outer dimensions of 1.
242
- embedding_vectors.append(lilac_embedding[EMBEDDING_KEY].reshape(-1))
243
- embedding_keys.append(key)
 
 
 
 
 
244
 
245
  embedding_matrix = np.array(embedding_vectors)
246
-
247
  # Write the embedding index and the ordered UUID column to disk so they can be joined later.
248
 
249
  with open_file(output_path_prefix + _EMBEDDINGS_SUFFIX, 'wb') as f:
250
  np.save(cast(str, f), embedding_matrix, allow_pickle=False)
251
- with open_file(output_path_prefix + _KEYS_SUFFIX, 'wb') as f:
252
- pickle.dump(embedding_keys, f)
253
 
254
  return output_path_prefix
255
 
256
 
257
- def read_embedding_index(filepath_prefix: str) -> tuple[list[VectorKey], np.ndarray]:
258
- """Reads the embedding index for a column from disk."""
 
259
  if not file_exists(filepath_prefix + _EMBEDDINGS_SUFFIX):
260
  raise ValueError(F'Embedding index does not exist at path {filepath_prefix}. '
261
  'Please run dataset.compute_signal() on the embedding signal first.')
262
-
263
  # Read the embedding index from disk.
264
  embeddings = np.load(filepath_prefix + _EMBEDDINGS_SUFFIX, allow_pickle=False)
265
- with open_file(filepath_prefix + _KEYS_SUFFIX, 'rb') as f:
266
- index_keys: list[VectorKey] = pickle.load(f)
267
- return index_keys, embeddings
268
 
269
 
270
  def write_items_to_parquet(items: Iterable[Item], output_dir: str, schema: Schema,
 
22
  VALUE_KEY,
23
  Field,
24
  Item,
25
+ PathKey,
26
  PathTuple,
27
  Schema,
28
  VectorKey,
 
34
  from ..utils import file_exists, log, open_file
35
 
36
  _KEYS_SUFFIX = '.keys.pkl'
37
+ _SPANS_SUFFIX = '.spans.pkl'
38
  _EMBEDDINGS_SUFFIX = '.npy'
39
 
40
 
 
220
  return schema({UUID_COLUMN: 'string', **cast(dict, enriched_schema.fields)})
221
 
222
 
223
+ def write_embeddings_to_disk(uuids: Iterable[str], signal_items: Iterable[Item], output_dir: str,
224
+ shard_index: int, num_shards: int) -> str:
225
  """Write a set of embeddings to disk."""
226
  output_path_prefix = embedding_index_filename_prefix(output_dir, shard_index, num_shards)
227
 
228
+ # For each item, we have a list of embedding spans.
229
  def embedding_predicate(input: Any) -> bool:
230
+ return (isinstance(input, list) and len(input) > 0 and isinstance(input[0], dict) and
231
+ EMBEDDING_KEY in input[0])
232
 
233
+ path_keys = flatten_keys(uuids, signal_items, is_primitive_predicate=embedding_predicate)
234
+ all_embeddings = cast(Iterable[Item],
235
+ flatten(signal_items, is_primitive_predicate=embedding_predicate))
236
 
237
  embedding_vectors: list[np.ndarray] = []
238
+ all_spans: list[tuple[PathKey, list[tuple[int, int]]]] = []
239
+ for path_key, embeddings in zip(path_keys, all_embeddings):
240
+ if not path_key or not embeddings:
241
  # Sparse embeddings may not have an embedding for every key.
242
  continue
243
 
244
+ spans: list[tuple[int, int]] = []
245
+ for e in embeddings:
246
+ span = e[VALUE_KEY]
247
+ vector = e[EMBEDDING_KEY]
248
+ # We squeeze here because embedding functions can return outer dimensions of 1.
249
+ embedding_vectors.append(vector.reshape(-1))
250
+ spans.append((span[TEXT_SPAN_START_FEATURE], span[TEXT_SPAN_END_FEATURE]))
251
+ all_spans.append((path_key, spans))
252
 
253
  embedding_matrix = np.array(embedding_vectors)
 
254
  # Write the embedding index and the ordered UUID column to disk so they can be joined later.
255
 
256
  with open_file(output_path_prefix + _EMBEDDINGS_SUFFIX, 'wb') as f:
257
  np.save(cast(str, f), embedding_matrix, allow_pickle=False)
258
+ with open_file(output_path_prefix + _SPANS_SUFFIX, 'wb') as f:
259
+ pickle.dump(all_spans, f)
260
 
261
  return output_path_prefix
262
 
263
 
264
+ def read_embeddings_from_disk(
265
+ filepath_prefix: str) -> tuple[list[tuple[PathKey, list[tuple[int, int]]]], np.ndarray]:
266
+ """Reads the embeddings from disk."""
267
  if not file_exists(filepath_prefix + _EMBEDDINGS_SUFFIX):
268
  raise ValueError(F'Embedding index does not exist at path {filepath_prefix}. '
269
  'Please run dataset.compute_signal() on the embedding signal first.')
 
270
  # Read the embedding index from disk.
271
  embeddings = np.load(filepath_prefix + _EMBEDDINGS_SUFFIX, allow_pickle=False)
272
+ with open_file(filepath_prefix + _SPANS_SUFFIX, 'rb') as f:
273
+ spans: list[tuple[PathKey, list[tuple[int, int]]]] = pickle.load(f)
274
+ return spans, embeddings
275
 
276
 
277
  def write_items_to_parquet(items: Iterable[Item], output_dir: str, schema: Schema,
lilac/embeddings/vector_store.py CHANGED
@@ -1,9 +1,10 @@
1
  """Interface for storing vectors."""
2
 
3
  import abc
4
- from typing import Iterable, Optional
5
 
6
  import numpy as np
 
7
 
8
  from ..schema import VectorKey
9
 
@@ -29,7 +30,7 @@ class VectorStore(abc.ABC):
29
  pass
30
 
31
  @abc.abstractmethod
32
- def get(self, keys: Iterable[VectorKey]) -> np.ndarray:
33
  """Return the embeddings for given keys.
34
 
35
  Args:
@@ -43,15 +44,95 @@ class VectorStore(abc.ABC):
43
  def topk(self,
44
  query: np.ndarray,
45
  k: int,
46
- key_prefixes: Optional[Iterable[VectorKey]] = None) -> list[tuple[VectorKey, float]]:
47
  """Return the top k most similar vectors.
48
 
49
  Args:
50
  query: The query vector.
51
  k: The number of results to return.
52
- key_prefixes: Optional key prefixes to restrict the search to.
53
 
54
  Returns
55
  A list of (key, score) tuples.
56
  """
57
  raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """Interface for storing vectors."""
2
 
3
  import abc
4
+ from typing import Iterable, Optional, Type
5
 
6
  import numpy as np
7
+ from typing_extensions import TypedDict
8
 
9
  from ..schema import VectorKey
10
 
 
30
  pass
31
 
32
  @abc.abstractmethod
33
+ def get(self, keys: Optional[Iterable[VectorKey]] = None) -> np.ndarray:
34
  """Return the embeddings for given keys.
35
 
36
  Args:
 
44
  def topk(self,
45
  query: np.ndarray,
46
  k: int,
47
+ keys: Optional[Iterable[VectorKey]] = None) -> list[tuple[VectorKey, float]]:
48
  """Return the top k most similar vectors.
49
 
50
  Args:
51
  query: The query vector.
52
  k: The number of results to return.
53
+ keys: Optional keys to restrict the search to.
54
 
55
  Returns
56
  A list of (key, score) tuples.
57
  """
58
  raise NotImplementedError
59
+
60
+
61
+ class SpanVector(TypedDict):
62
+ """A span with a vector."""
63
+ span: tuple[int, int]
64
+ vector: np.ndarray
65
+
66
+
67
+ PathKey = VectorKey
68
+
69
+
70
+ class VectorDBIndex:
71
+ """Stores and retrives span vectors.
72
+
73
+ This wraps a regular vector store by adding a mapping from path keys, such as (uuid1, 0),
74
+ to span keys, such as (uuid1, 0, 0), which denotes the first span in the (uuid1, 0) text document.
75
+ """
76
+
77
+ def __init__(self, vector_store_cls: Type[VectorStore],
78
+ spans: list[tuple[PathKey, list[tuple[int, int]]]], embeddings: np.ndarray) -> None:
79
+ vector_keys = [(*path_key, i) for path_key, spans in spans for i in range(len(spans))]
80
+ self._vector_store = vector_store_cls()
81
+ self._vector_store.add(vector_keys, embeddings)
82
+ # Map a path key to spans for that path.
83
+ self._id_to_spans: dict[PathKey, list[tuple[int, int]]] = {}
84
+ self._id_to_spans.update(spans)
85
+
86
+ def get_vector_store(self) -> VectorStore:
87
+ """Return the vector store."""
88
+ return self._vector_store
89
+
90
+ def get(self, keys: Iterable[PathKey]) -> Iterable[list[SpanVector]]:
91
+ """Return the spans with vectors for each key in `keys`.
92
+
93
+ Args:
94
+ keys: The keys to return the vectors for.
95
+
96
+ Returns
97
+ The span vectors for the given keys.
98
+ """
99
+ all_spans: list[list[tuple[int, int]]] = []
100
+ vector_keys: list[VectorKey] = []
101
+ for path_key in keys:
102
+ spans = self._id_to_spans[path_key]
103
+ all_spans.append(spans)
104
+ vector_keys.extend([(*path_key, i) for i in range(len(spans))])
105
+
106
+ all_vectors = self._vector_store.get(vector_keys)
107
+ offset = 0
108
+ for spans in all_spans:
109
+ vectors = all_vectors[offset:offset + len(spans)]
110
+ yield [{'span': span, 'vector': vector} for span, vector in zip(spans, vectors)]
111
+ offset += len(spans)
112
+
113
+ def topk(self,
114
+ query: np.ndarray,
115
+ k: int,
116
+ path_keys: Optional[Iterable[PathKey]] = None) -> list[tuple[PathKey, float]]:
117
+ """Return the top k most similar vectors.
118
+
119
+ Args:
120
+ query: The query vector.
121
+ k: The number of results to return.
122
+ path_keys: Optional key prefixes to restrict the search to.
123
+
124
+ Returns
125
+ A list of (key, score) tuples.
126
+ """
127
+ vector_keys: Optional[list[VectorKey]] = None
128
+ if path_keys:
129
+ vector_keys = [
130
+ (*path_key, i) for path_key in path_keys for i in range(len(self._id_to_spans[path_key]))
131
+ ]
132
+ vector_key_scores = self._vector_store.topk(query, k, vector_keys)
133
+ path_key_scores: dict[PathKey, float] = {}
134
+ for (*path_key_list, _), score in vector_key_scores:
135
+ path_key = tuple(path_key_list)
136
+ if path_key not in path_key_scores:
137
+ path_key_scores[path_key] = score
138
+ return list(path_key_scores.items())
lilac/embeddings/vector_store_numpy.py CHANGED
@@ -34,13 +34,11 @@ class NumpyVectorStore(VectorStore):
34
  # Cast to float32 since dot product with float32 is 40-50x faster than float16 and 2.5x faster
35
  # than float64.
36
  self._embeddings = embeddings.astype(np.float32)
37
-
38
- index = pd.MultiIndex.from_tuples(keys)
39
  row_indices = np.arange(len(self._embeddings), dtype=np.uint32)
40
- self._lookup = pd.Series(row_indices, index=index)
41
 
42
  @override
43
- def get(self, keys: Iterable[VectorKey]) -> np.ndarray:
44
  """Return the embeddings for given keys.
45
 
46
  Args:
@@ -49,6 +47,8 @@ class NumpyVectorStore(VectorStore):
49
  Returns
50
  The embeddings for the given keys.
51
  """
 
 
52
  locs = self._lookup.loc[cast(list[str], keys)]
53
  return self._embeddings.take(locs, axis=0)
54
 
@@ -56,14 +56,10 @@ class NumpyVectorStore(VectorStore):
56
  def topk(self,
57
  query: np.ndarray,
58
  k: int,
59
- key_prefixes: Optional[Iterable[VectorKey]] = None) -> list[tuple[VectorKey, float]]:
60
- if key_prefixes is not None:
61
- # Cast tuples of length 1 to the element itself to avoid a pandas bug.
62
- key_prefixes = cast(
63
- list[VectorKey],
64
- [k[0] if isinstance(k, tuple) and len(k) == 1 else k for k in key_prefixes])
65
  # This uses the hierarchical index (MutliIndex) to do a prefix lookup.
66
- row_indices = self._lookup.loc[cast(list[str], key_prefixes)]
67
  keys, embeddings = list(row_indices.index), self._embeddings.take(row_indices, axis=0)
68
  else:
69
  keys, embeddings = self._keys, self._embeddings
 
34
  # Cast to float32 since dot product with float32 is 40-50x faster than float16 and 2.5x faster
35
  # than float64.
36
  self._embeddings = embeddings.astype(np.float32)
 
 
37
  row_indices = np.arange(len(self._embeddings), dtype=np.uint32)
38
+ self._lookup = pd.Series(row_indices, index=keys)
39
 
40
  @override
41
+ def get(self, keys: Optional[Iterable[VectorKey]] = None) -> np.ndarray:
42
  """Return the embeddings for given keys.
43
 
44
  Args:
 
47
  Returns
48
  The embeddings for the given keys.
49
  """
50
+ if not keys:
51
+ return self._embeddings
52
  locs = self._lookup.loc[cast(list[str], keys)]
53
  return self._embeddings.take(locs, axis=0)
54
 
 
56
  def topk(self,
57
  query: np.ndarray,
58
  k: int,
59
+ keys: Optional[Iterable[VectorKey]] = None) -> list[tuple[VectorKey, float]]:
60
+ if keys is not None:
 
 
 
 
61
  # This uses the hierarchical index (MutliIndex) to do a prefix lookup.
62
+ row_indices = self._lookup.loc[cast(list[str], keys)]
63
  keys, embeddings = list(row_indices.index), self._embeddings.take(row_indices, axis=0)
64
  else:
65
  keys, embeddings = self._keys, self._embeddings
lilac/router_concept.py CHANGED
@@ -125,7 +125,7 @@ class ScoreBody(BaseModel):
125
 
126
  class ScoreResponse(BaseModel):
127
  """Response body for the score endpoint."""
128
- scores: list[dict]
129
  model_synced: bool
130
 
131
 
@@ -212,7 +212,7 @@ def score(namespace: str, concept_name: str, embedding_name: str, body: ScoreBod
212
  model_updated = DISK_CONCEPT_MODEL_DB.sync(model, user)
213
  # TODO(smilkov): Support images.
214
  texts = [example.text or '' for example in body.examples]
215
- return ScoreResponse(scores=model.score(body.draft, texts), model_synced=model_updated)
216
 
217
 
218
  class Examples(OpenAISchema):
 
125
 
126
  class ScoreResponse(BaseModel):
127
  """Response body for the score endpoint."""
128
+ scored_spans: list[list[dict]]
129
  model_synced: bool
130
 
131
 
 
212
  model_updated = DISK_CONCEPT_MODEL_DB.sync(model, user)
213
  # TODO(smilkov): Support images.
214
  texts = [example.text or '' for example in body.examples]
215
+ return ScoreResponse(scored_spans=model.score(body.draft, texts), model_synced=model_updated)
216
 
217
 
218
  class Examples(OpenAISchema):
lilac/router_dataset.py CHANGED
@@ -29,13 +29,7 @@ from .schema import Bin, Path, normalize_path
29
  from .signals.concept_labels import ConceptLabelsSignal
30
  from .signals.concept_scorer import ConceptScoreSignal
31
  from .signals.semantic_similarity import SemanticSimilaritySignal
32
- from .signals.signal import (
33
- Signal,
34
- TextEmbeddingModelSignal,
35
- TextEmbeddingSignal,
36
- TextSignal,
37
- resolve_signal,
38
- )
39
  from .signals.substring_search import SubstringSignal
40
  from .tasks import TaskId, task_manager
41
  from .utils import DatasetInfo, list_datasets
@@ -176,8 +170,7 @@ class ListFilter(BaseModel):
176
  Filter = Union[BinaryFilter, UnaryFilter, ListFilter]
177
 
178
  AllSignalTypes = Union[ConceptScoreSignal, ConceptLabelsSignal, SubstringSignal,
179
- SemanticSimilaritySignal, TextEmbeddingModelSignal, TextEmbeddingSignal,
180
- TextSignal, Signal]
181
 
182
 
183
  # We override the `Column` class so we can add explicitly all signal types for better OpenAPI spec.
 
29
  from .signals.concept_labels import ConceptLabelsSignal
30
  from .signals.concept_scorer import ConceptScoreSignal
31
  from .signals.semantic_similarity import SemanticSimilaritySignal
32
+ from .signals.signal import Signal, TextEmbeddingSignal, TextSignal, resolve_signal
 
 
 
 
 
 
33
  from .signals.substring_search import SubstringSignal
34
  from .tasks import TaskId, task_manager
35
  from .utils import DatasetInfo, list_datasets
 
170
  Filter = Union[BinaryFilter, UnaryFilter, ListFilter]
171
 
172
  AllSignalTypes = Union[ConceptScoreSignal, ConceptLabelsSignal, SubstringSignal,
173
+ SemanticSimilaritySignal, TextEmbeddingSignal, TextSignal, Signal]
 
174
 
175
 
176
  # We override the `Column` class so we can add explicitly all signal types for better OpenAPI spec.
lilac/schema.py CHANGED
@@ -40,6 +40,7 @@ PathKeyedItem = tuple[Path, Item]
40
  # These fields are for for python only and not written to a schema.
41
  RichData = Union[str, bytes]
42
  VectorKey = tuple[Union[StrictStr, StrictInt], ...]
 
43
 
44
 
45
  class DataType(str, Enum):
@@ -94,16 +95,15 @@ class SignalInputType(str, Enum):
94
  return self.value
95
 
96
 
97
- SIGNAL_COMPUTE_TYPE_TO_VALID_DTYPES: dict[SignalInputType, list[DataType]] = {
98
  SignalInputType.TEXT: [DataType.STRING, DataType.STRING_SPAN],
99
- SignalInputType.TEXT_EMBEDDING: [DataType.EMBEDDING],
100
  SignalInputType.IMAGE: [DataType.BINARY],
101
  }
102
 
103
 
104
- def signal_compute_type_supports_dtype(input_type: SignalInputType, dtype: DataType) -> bool:
105
  """Returns True if the signal compute type supports the dtype."""
106
- return dtype in SIGNAL_COMPUTE_TYPE_TO_VALID_DTYPES[input_type]
107
 
108
 
109
  Bin = tuple[str, Optional[Union[float, int]], Optional[Union[float, int]]]
 
40
  # These fields are for for python only and not written to a schema.
41
  RichData = Union[str, bytes]
42
  VectorKey = tuple[Union[StrictStr, StrictInt], ...]
43
+ PathKey = VectorKey
44
 
45
 
46
  class DataType(str, Enum):
 
95
  return self.value
96
 
97
 
98
+ SIGNAL_TYPE_TO_VALID_DTYPES: dict[SignalInputType, list[DataType]] = {
99
  SignalInputType.TEXT: [DataType.STRING, DataType.STRING_SPAN],
 
100
  SignalInputType.IMAGE: [DataType.BINARY],
101
  }
102
 
103
 
104
+ def signal_type_supports_dtype(input_type: SignalInputType, dtype: DataType) -> bool:
105
  """Returns True if the signal compute type supports the dtype."""
106
+ return dtype in SIGNAL_TYPE_TO_VALID_DTYPES[input_type]
107
 
108
 
109
  Bin = tuple[str, Optional[Union[float, int]], Optional[Union[float, int]]]
lilac/server.py CHANGED
@@ -117,7 +117,7 @@ def startup() -> None:
117
 
118
  if repo_id:
119
  # Copy datasets.
120
- spaces_data_dir = os.path.join('data')
121
  datasets = list_datasets(spaces_data_dir)
122
  for dataset in datasets:
123
  spaces_dataset_output_dir = get_dataset_output_dir(spaces_data_dir, dataset.namespace,
 
117
 
118
  if repo_id:
119
  # Copy datasets.
120
+ spaces_data_dir = 'data'
121
  datasets = list_datasets(spaces_data_dir)
122
  for dataset in datasets:
123
  spaces_dataset_output_dir = get_dataset_output_dir(spaces_data_dir, dataset.namespace,
lilac/signals/concept_scorer.py CHANGED
@@ -7,14 +7,17 @@ from typing_extensions import override
7
  from ..auth import UserInfo
8
  from ..concepts.concept import DEFAULT_NUM_NEG_EXAMPLES, DRAFT_MAIN, ConceptColumnInfo, ConceptModel
9
  from ..concepts.db_concept import DISK_CONCEPT_MODEL_DB, ConceptModelDB
10
- from ..embeddings.vector_store import VectorStore
11
- from ..schema import Field, Item, RichData, VectorKey, field
12
- from .signal import TextEmbeddingModelSignal
 
13
 
14
 
15
- class ConceptScoreSignal(TextEmbeddingModelSignal):
16
  """Compute scores along a given concept for documents."""
17
  name = 'concept_score'
 
 
18
  display_name = 'Concept'
19
 
20
  namespace: str
@@ -33,10 +36,16 @@ class ConceptScoreSignal(TextEmbeddingModelSignal):
33
 
34
  @override
35
  def fields(self) -> Field:
36
- return field(
37
- 'float32',
38
- bins=[('Not in concept', None, 0.5), ('In concept', 0.5, None)],
39
- )
 
 
 
 
 
 
40
 
41
  def set_column_info(self, column_info: ConceptColumnInfo) -> None:
42
  """Set the dataset info for this signal."""
@@ -63,26 +72,34 @@ class ConceptScoreSignal(TextEmbeddingModelSignal):
63
  return concept_model.score(self.draft, data)
64
 
65
  @override
66
- def vector_compute(self, keys: Iterable[VectorKey],
67
- vector_store: VectorStore) -> Iterable[Optional[Item]]:
68
  concept_model = self._get_concept_model()
69
- embeddings = vector_store.get(keys)
70
- return concept_model.score_embeddings(self.draft, embeddings).tolist()
 
 
 
 
 
 
 
 
71
 
72
  @override
73
  def vector_compute_topk(
74
  self,
75
  topk: int,
76
- vector_store: VectorStore,
77
- keys: Optional[Iterable[VectorKey]] = None) -> list[tuple[VectorKey, Optional[Item]]]:
78
  concept_model = self._get_concept_model()
79
  query: np.ndarray = concept_model.coef(self.draft)
80
- topk_keys = [key for key, _ in vector_store.topk(query, topk, keys)]
81
- return list(zip(topk_keys, self.vector_compute(topk_keys, vector_store)))
82
 
83
  @override
84
  def key(self, is_computed_signal: Optional[bool] = False) -> str:
85
  # NOTE: The embedding is a value so already exists in the path structure. This means we do not
86
  # need to provide the name as part of the key, which still guarantees uniqueness.
87
  version = f'/v{self._get_concept_model().version}' if is_computed_signal else ''
88
- return f'{self.namespace}/{self.concept_name}{version}'
 
7
  from ..auth import UserInfo
8
  from ..concepts.concept import DEFAULT_NUM_NEG_EXAMPLES, DRAFT_MAIN, ConceptColumnInfo, ConceptModel
9
  from ..concepts.db_concept import DISK_CONCEPT_MODEL_DB, ConceptModelDB
10
+ from ..data.dataset_utils import lilac_span
11
+ from ..embeddings.vector_store import VectorDBIndex
12
+ from ..schema import Field, Item, PathKey, RichData, SignalInputType, field
13
+ from ..signals.signal import VectorSignal
14
 
15
 
16
+ class ConceptScoreSignal(VectorSignal):
17
  """Compute scores along a given concept for documents."""
18
  name = 'concept_score'
19
+ input_type = SignalInputType.TEXT
20
+
21
  display_name = 'Concept'
22
 
23
  namespace: str
 
36
 
37
  @override
38
  def fields(self) -> Field:
39
+ return field(fields=[
40
+ field(
41
+ dtype='string_span',
42
+ fields={
43
+ 'score': field(
44
+ 'float32',
45
+ bins=[('Not in concept', None, 0.5), ('In concept', 0.5, None)],
46
+ )
47
+ })
48
+ ])
49
 
50
  def set_column_info(self, column_info: ConceptColumnInfo) -> None:
51
  """Set the dataset info for this signal."""
 
72
  return concept_model.score(self.draft, data)
73
 
74
  @override
75
+ def vector_compute(self, keys: Iterable[PathKey],
76
+ vector_index: VectorDBIndex) -> Iterable[Optional[Item]]:
77
  concept_model = self._get_concept_model()
78
+ all_vector_spans = vector_index.get(keys)
79
+ # TODO(smilkov): Do this with batched computation.
80
+ for vector_spans in all_vector_spans:
81
+ embeddings = np.array([vector_span['vector'] for vector_span in vector_spans])
82
+ scores = concept_model.score_embeddings(self.draft, embeddings)
83
+ res: Item = []
84
+ for vector_span, score in zip(vector_spans, scores):
85
+ start, end = vector_span['span']
86
+ res.append(lilac_span(start, end, {'score': score}))
87
+ yield res
88
 
89
  @override
90
  def vector_compute_topk(
91
  self,
92
  topk: int,
93
+ vector_index: VectorDBIndex,
94
+ keys: Optional[Iterable[PathKey]] = None) -> list[tuple[PathKey, Optional[Item]]]:
95
  concept_model = self._get_concept_model()
96
  query: np.ndarray = concept_model.coef(self.draft)
97
+ topk_keys = [key for key, _ in vector_index.topk(query, topk, keys)]
98
+ return list(zip(topk_keys, self.vector_compute(topk_keys, vector_index)))
99
 
100
  @override
101
  def key(self, is_computed_signal: Optional[bool] = False) -> str:
102
  # NOTE: The embedding is a value so already exists in the path structure. This means we do not
103
  # need to provide the name as part of the key, which still guarantees uniqueness.
104
  version = f'/v{self._get_concept_model().version}' if is_computed_signal else ''
105
+ return f'{self.namespace}/{self.concept_name}/{self.embedding}{version}'
lilac/signals/lang_detection.py CHANGED
@@ -1,6 +1,6 @@
1
  """Language detection of a document."""
2
  import re
3
- from typing import TYPE_CHECKING, Iterable, Optional, cast
4
 
5
  from pydantic import Field as PydanticField
6
  from typing_extensions import override
@@ -10,9 +10,7 @@ from ..schema import Field, Item, RichData, SignalInputType, field
10
  from .signal import TextSignal
11
 
12
  LANG_CODE = 'lang_code'
13
-
14
- if TYPE_CHECKING:
15
- import langdetect
16
 
17
 
18
  class LangDetectionSignal(TextSignal):
@@ -27,12 +25,18 @@ class LangDetectionSignal(TextSignal):
27
  display_name = 'Language detection'
28
 
29
  input_type = SignalInputType.TEXT
30
- compute_type = SignalInputType.TEXT
31
 
32
  split_by_paragraph: bool = PydanticField(
33
  default=False, description='Compute language scores for each paragraph.')
34
 
35
- _model: Optional['langdetect.detect'] = None
 
 
 
 
 
 
 
36
 
37
  @override
38
  def setup(self) -> None:
@@ -42,7 +46,6 @@ class LangDetectionSignal(TextSignal):
42
  except ImportError:
43
  raise ImportError('Could not import the "langdetect" python package. '
44
  'Please install it with `pip install langdetect`.')
45
- self._model = langdetect.detect
46
 
47
  @override
48
  def fields(self) -> Field:
@@ -52,9 +55,6 @@ class LangDetectionSignal(TextSignal):
52
 
53
  @override
54
  def compute(self, data: Iterable[RichData]) -> Iterable[Optional[Item]]:
55
- if not self._model:
56
- raise RuntimeError('Language detection model is not initialized.')
57
-
58
  import langdetect
59
  data = cast(Iterable[str], data)
60
  # Split on paragraphs.
@@ -62,10 +62,7 @@ class LangDetectionSignal(TextSignal):
62
 
63
  for text in data:
64
  if not self.split_by_paragraph:
65
- try:
66
- yield self._model(text)
67
- except langdetect.LangDetectException:
68
- yield None
69
  continue
70
 
71
  prev_end = 0
@@ -75,20 +72,16 @@ class LangDetectionSignal(TextSignal):
75
  text_span = text[prev_end:start]
76
  text_span = text_span.strip()
77
  if text_span:
78
- try:
79
- lang_code = self._model(text_span)
80
  result.append(lilac_span(prev_end, start, {LANG_CODE: lang_code}))
81
- except langdetect.LangDetectException:
82
- pass
83
  prev_end = end
84
 
85
  # Process the last chunk.
86
  text_span = text[prev_end:]
87
  if text_span.strip():
88
- try:
89
- lang_code = self._model(text_span)
90
  result.append(lilac_span(prev_end, len(text), {LANG_CODE: lang_code}))
91
- except langdetect.LangDetectException:
92
- pass
93
 
94
  yield result
 
1
  """Language detection of a document."""
2
  import re
3
+ from typing import Any, Iterable, Optional, cast
4
 
5
  from pydantic import Field as PydanticField
6
  from typing_extensions import override
 
10
  from .signal import TextSignal
11
 
12
  LANG_CODE = 'lang_code'
13
+ TEXT_LEN_THRESHOLD = 25
 
 
14
 
15
 
16
  class LangDetectionSignal(TextSignal):
 
25
  display_name = 'Language detection'
26
 
27
  input_type = SignalInputType.TEXT
 
28
 
29
  split_by_paragraph: bool = PydanticField(
30
  default=False, description='Compute language scores for each paragraph.')
31
 
32
+ def _detect(self, text: str, langdetect: Any) -> Optional[str]:
33
+
34
+ if len(text) < TEXT_LEN_THRESHOLD:
35
+ return 'TOO_SHORT'
36
+ try:
37
+ return langdetect.detect(text)
38
+ except langdetect.LangDetectException:
39
+ return None
40
 
41
  @override
42
  def setup(self) -> None:
 
46
  except ImportError:
47
  raise ImportError('Could not import the "langdetect" python package. '
48
  'Please install it with `pip install langdetect`.')
 
49
 
50
  @override
51
  def fields(self) -> Field:
 
55
 
56
  @override
57
  def compute(self, data: Iterable[RichData]) -> Iterable[Optional[Item]]:
 
 
 
58
  import langdetect
59
  data = cast(Iterable[str], data)
60
  # Split on paragraphs.
 
62
 
63
  for text in data:
64
  if not self.split_by_paragraph:
65
+ yield self._detect(text, langdetect)
 
 
 
66
  continue
67
 
68
  prev_end = 0
 
72
  text_span = text[prev_end:start]
73
  text_span = text_span.strip()
74
  if text_span:
75
+ lang_code = self._detect(text_span, langdetect)
76
+ if lang_code:
77
  result.append(lilac_span(prev_end, start, {LANG_CODE: lang_code}))
 
 
78
  prev_end = end
79
 
80
  # Process the last chunk.
81
  text_span = text[prev_end:]
82
  if text_span.strip():
83
+ lang_code = self._detect(text_span, langdetect)
84
+ if lang_code:
85
  result.append(lilac_span(prev_end, len(text), {LANG_CODE: lang_code}))
 
 
86
 
87
  yield result
lilac/signals/minhash_dup.py CHANGED
@@ -17,7 +17,7 @@ from scipy.integrate import quad as integrate
17
  from tqdm import tqdm
18
 
19
  SEED = 42
20
- NON_ALPHA = re.compile('[^A-Za-z_0-9]')
21
  RNG = np.random.RandomState(SEED)
22
  MAX_HASH = np.uint64((1 << 32) - 1)
23
  MERSENNE_PRIME = np.uint64((1 << 61) - 1)
@@ -72,7 +72,7 @@ def _embed_func(
72
  The hash values in each range and the index.
73
  """
74
  hashvalues = np.ones(num_perm, dtype=np.uint64) * MAX_HASH
75
- tokens = {' '.join(t) for t in _ngrams(NON_ALPHA.split(content), ngram_size, min_ngram_size)}
76
  hv = np.array([_sha1_hash32(token.encode('utf-8')) for token in tokens],
77
  dtype=np.uint64) # noqa: E501
78
  a, b = permutations
 
17
  from tqdm import tqdm
18
 
19
  SEED = 42
20
+ WHITESPACE = re.compile(r'\s+')
21
  RNG = np.random.RandomState(SEED)
22
  MAX_HASH = np.uint64((1 << 32) - 1)
23
  MERSENNE_PRIME = np.uint64((1 << 61) - 1)
 
72
  The hash values in each range and the index.
73
  """
74
  hashvalues = np.ones(num_perm, dtype=np.uint64) * MAX_HASH
75
+ tokens = {' '.join(t) for t in _ngrams(WHITESPACE.split(content), ngram_size, min_ngram_size)}
76
  hv = np.array([_sha1_hash32(token.encode('utf-8')) for token in tokens],
77
  dtype=np.uint64) # noqa: E501
78
  a, b = permutations
lilac/signals/near_dup.py CHANGED
@@ -24,7 +24,6 @@ class NearDuplicateSignal(TextSignal):
24
  display_name = 'Near duplicate documents'
25
 
26
  input_type = SignalInputType.TEXT
27
- compute_type = SignalInputType.TEXT
28
 
29
  threshold: float = PydanticField(
30
  default=0.75,
 
24
  display_name = 'Near duplicate documents'
25
 
26
  input_type = SignalInputType.TEXT
 
27
 
28
  threshold: float = PydanticField(
29
  default=0.75,
lilac/signals/ner.py CHANGED
@@ -23,7 +23,6 @@ class SpacyNER(TextSignal):
23
  model: str = PydanticField(title='SpaCy package name or model path.', default='en_core_web_sm')
24
 
25
  input_type = SignalInputType.TEXT
26
- compute_type = SignalInputType.TEXT
27
 
28
  _nlp: Optional['spacy.language.Language'] = None
29
 
 
23
  model: str = PydanticField(title='SpaCy package name or model path.', default='en_core_web_sm')
24
 
25
  input_type = SignalInputType.TEXT
 
26
 
27
  _nlp: Optional['spacy.language.Language'] = None
28
 
lilac/signals/pii.py CHANGED
@@ -25,7 +25,6 @@ class PIISignal(TextSignal):
25
  display_name = 'Personal Information (PII)'
26
 
27
  input_type = SignalInputType.TEXT
28
- compute_type = SignalInputType.TEXT
29
 
30
  @override
31
  def fields(self) -> Field:
 
25
  display_name = 'Personal Information (PII)'
26
 
27
  input_type = SignalInputType.TEXT
 
28
 
29
  @override
30
  def fields(self) -> Field:
lilac/signals/semantic_similarity.py CHANGED
@@ -5,13 +5,14 @@ import numpy as np
5
  from scipy.interpolate import interp1d
6
  from typing_extensions import override
7
 
 
8
  from ..embeddings.embedding import EmbedFn, get_embed_fn
9
- from ..embeddings.vector_store import VectorStore
10
- from ..schema import Field, Item, RichData, VectorKey, field
11
- from .signal import TextEmbeddingModelSignal
12
 
13
 
14
- class SemanticSimilaritySignal(TextEmbeddingModelSignal):
15
  """Compute semantic similarity for a query and a document.
16
 
17
  \
@@ -20,6 +21,7 @@ class SemanticSimilaritySignal(TextEmbeddingModelSignal):
20
  """
21
  name = 'semantic_similarity'
22
  display_name = 'Semantic Similarity'
 
23
 
24
  query: str
25
 
@@ -32,15 +34,13 @@ class SemanticSimilaritySignal(TextEmbeddingModelSignal):
32
  def __init__(self, query: Union[str, bytes], embedding: str, **kwargs: Any):
33
  if isinstance(query, bytes):
34
  raise ValueError('Image queries are not yet supported for SemanticSimilarity.')
35
-
36
- super().__init__(query=query, embedding=embedding, **kwargs)
37
-
38
  # TODO(nsthorat): The embedding cls might have arguments. This needs to be resolved.
39
  self._embed_fn = get_embed_fn(embedding)
40
 
41
  @override
42
  def fields(self) -> Field:
43
- return field('float32')
44
 
45
  def _get_search_embedding(self) -> np.ndarray:
46
  """Return the embedding for the search text."""
@@ -51,26 +51,32 @@ class SemanticSimilaritySignal(TextEmbeddingModelSignal):
51
 
52
  @override
53
  def compute(self, data: Iterable[RichData]) -> Iterable[Optional[Item]]:
54
- text_embeddings = self._embed_fn(data)
55
- similarities = text_embeddings.dot(self._get_search_embedding()).reshape(-1)
56
- return similarities.tolist()
 
57
 
58
  @override
59
- def vector_compute(self, keys: Iterable[VectorKey],
60
- vector_store: VectorStore) -> Iterable[Optional[Item]]:
61
- text_embeddings = vector_store.get(keys)
62
- similarities = text_embeddings.dot(self._get_search_embedding()).reshape(-1)
63
- # Clip the similarities since float precision can cause these to be barely outside the range and
64
- # throw an exception with interp1d.
65
- similarities = np.clip(similarities, -1., 1.)
66
- return self._interpolate_fn(similarities).tolist()
 
 
 
 
 
67
 
68
  @override
69
  def vector_compute_topk(
70
  self,
71
  topk: int,
72
- vector_store: VectorStore,
73
- keys: Optional[Iterable[VectorKey]] = None) -> list[tuple[VectorKey, Optional[Item]]]:
74
  query = self._get_search_embedding()
75
- topk_keys = [key for key, _ in vector_store.topk(query, topk, keys)]
76
- return list(zip(topk_keys, self.vector_compute(topk_keys, vector_store)))
 
5
  from scipy.interpolate import interp1d
6
  from typing_extensions import override
7
 
8
+ from ..data.dataset_utils import lilac_span
9
  from ..embeddings.embedding import EmbedFn, get_embed_fn
10
+ from ..embeddings.vector_store import VectorDBIndex
11
+ from ..schema import Field, Item, PathKey, RichData, SignalInputType, field
12
+ from .signal import VectorSignal
13
 
14
 
15
+ class SemanticSimilaritySignal(VectorSignal):
16
  """Compute semantic similarity for a query and a document.
17
 
18
  \
 
21
  """
22
  name = 'semantic_similarity'
23
  display_name = 'Semantic Similarity'
24
+ input_type = SignalInputType.TEXT
25
 
26
  query: str
27
 
 
34
  def __init__(self, query: Union[str, bytes], embedding: str, **kwargs: Any):
35
  if isinstance(query, bytes):
36
  raise ValueError('Image queries are not yet supported for SemanticSimilarity.')
37
+ super().__init__(query=query, embedding=embedding, **kwargs) # type: ignore
 
 
38
  # TODO(nsthorat): The embedding cls might have arguments. This needs to be resolved.
39
  self._embed_fn = get_embed_fn(embedding)
40
 
41
  @override
42
  def fields(self) -> Field:
43
+ return field(fields=[field(dtype='string_span', fields={'score': 'float32'})])
44
 
45
  def _get_search_embedding(self) -> np.ndarray:
46
  """Return the embedding for the search text."""
 
51
 
52
  @override
53
  def compute(self, data: Iterable[RichData]) -> Iterable[Optional[Item]]:
54
+ embeddings = self._embed_fn(data)
55
+ scores = embeddings.dot(self._get_search_embedding()).reshape(-1)
56
+ for text, score in zip(data, scores):
57
+ yield [lilac_span(0, len(text), {'score': score})]
58
 
59
  @override
60
+ def vector_compute(self, keys: Iterable[PathKey],
61
+ vector_index: VectorDBIndex) -> Iterable[Optional[Item]]:
62
+ all_vector_spans = vector_index.get(keys)
63
+ query = self._get_search_embedding()
64
+ # TODO(smilkov): Do this with batched computation.
65
+ for vector_spans in all_vector_spans:
66
+ embeddings = np.array([vector_span['vector'] for vector_span in vector_spans])
67
+ scores = embeddings.dot(query).reshape(-1)
68
+ res: Item = []
69
+ for vector_span, score in zip(vector_spans, scores):
70
+ start, end = vector_span['span']
71
+ res.append(lilac_span(start, end, {'score': score}))
72
+ yield res
73
 
74
  @override
75
  def vector_compute_topk(
76
  self,
77
  topk: int,
78
+ vector_index: VectorDBIndex,
79
+ keys: Optional[Iterable[PathKey]] = None) -> list[tuple[PathKey, Optional[Item]]]:
80
  query = self._get_search_embedding()
81
+ topk_keys = [key for key, _ in vector_index.topk(query, topk, keys)]
82
+ return list(zip(topk_keys, self.vector_compute(topk_keys, vector_index)))
lilac/signals/signal.py CHANGED
@@ -6,8 +6,8 @@ from typing import Any, ClassVar, Iterable, Optional, Sequence, Type, TypeVar, U
6
  from pydantic import BaseModel, Extra, validator
7
  from typing_extensions import override
8
 
9
- from ..embeddings.vector_store import VectorStore
10
- from ..schema import Field, Item, RichData, SignalInputType, VectorKey, field
11
 
12
  EMBEDDING_KEY = 'embedding'
13
 
@@ -19,12 +19,8 @@ class Signal(abc.ABC, BaseModel):
19
  # The display name is just used for rendering in the UI.
20
  display_name: ClassVar[Optional[str]]
21
 
22
- # The input type is used to populate the UI for signals that require other signals. For example,
23
- # if a signal is an TextEmbeddingModelSignal, it computes over embeddings, but it's input type
24
- # will be text.
25
  input_type: ClassVar[SignalInputType]
26
- # The compute type defines what should be passed to compute().
27
- compute_type: ClassVar[SignalInputType]
28
 
29
  # The signal_name will get populated in init automatically from the class name so it gets
30
  # serialized and the signal author doesn't have to define both the static property and the field.
@@ -79,40 +75,6 @@ class Signal(abc.ABC, BaseModel):
79
  """
80
  raise NotImplementedError
81
 
82
- def vector_compute(self, keys: Iterable[VectorKey],
83
- vector_store: VectorStore) -> Iterable[Optional[Item]]:
84
- """Compute the signal for an iterable of keys that point to documents or images.
85
-
86
- Args:
87
- keys: An iterable of value ids (at row-level or lower) to lookup precomputed embeddings.
88
- vector_store: The vector store to lookup pre-computed embeddings.
89
-
90
- Returns
91
- An iterable of items. Sparse signals should return "None" for skipped inputs.
92
- """
93
- raise NotImplementedError
94
-
95
- def vector_compute_topk(
96
- self,
97
- topk: int,
98
- vector_store: VectorStore,
99
- keys: Optional[Iterable[VectorKey]] = None) -> Sequence[tuple[VectorKey, Optional[Item]]]:
100
- """Return signal results only for the top k documents or images.
101
-
102
- Signals decide how to rank each document/image in the dataset, usually by a similarity score
103
- obtained via the vector store.
104
-
105
- Args:
106
- topk: The number of items to return, ranked by the signal.
107
- vector_store: The vector store to lookup pre-computed embeddings.
108
- keys: Optional iterable of row ids to restrict the search to.
109
-
110
- Returns
111
- A list of (key, signal_output) tuples containing the `topk` items. Sparse signals should
112
- return "None" for skipped inputs.
113
- """
114
- raise NotImplementedError
115
-
116
  def key(self, is_computed_signal: Optional[bool] = False) -> str:
117
  """Get the key for a signal.
118
 
@@ -155,7 +117,6 @@ def _args_key_from_dict(args_dict: dict[str, Any]) -> str:
155
  class TextSplitterSignal(Signal):
156
  """An interface for signals that compute over text."""
157
  input_type = SignalInputType.TEXT
158
- compute_type = SignalInputType.TEXT
159
 
160
  @override
161
  def fields(self) -> Field:
@@ -166,7 +127,6 @@ class TextSplitterSignal(Signal):
166
  class TextSignal(Signal):
167
  """An interface for signals that compute over text."""
168
  input_type = SignalInputType.TEXT
169
- compute_type = SignalInputType.TEXT
170
 
171
  @override
172
  def key(self, is_computed_signal: Optional[bool] = False) -> str:
@@ -179,7 +139,6 @@ class TextSignal(Signal):
179
  class TextEmbeddingSignal(TextSignal):
180
  """An interface for signals that compute embeddings for text."""
181
  input_type = SignalInputType.TEXT
182
- compute_type = SignalInputType.TEXT
183
 
184
  _split = True
185
 
@@ -196,38 +155,43 @@ class TextEmbeddingSignal(TextSignal):
196
  return field(fields=[field('string_span', fields={EMBEDDING_KEY: 'embedding'})])
197
 
198
 
199
- class TextEmbeddingModelSignal(TextSignal):
200
- """An interface for signals that take embeddings and produce items."""
201
- input_type = SignalInputType.TEXT
202
- # compute() takes embeddings, while it operates over text fields by transitively computing splits
203
- # and embeddings.
204
- compute_type = SignalInputType.TEXT_EMBEDDING
205
-
206
  embedding: str
207
- _embedding_signal: Optional[TextEmbeddingSignal] = None
208
 
209
- def __init__(self, **kwargs: Any):
210
- super().__init__(**kwargs)
 
211
 
212
- # Validate the embedding signal is registered and the correct type.
213
- # TODO(nsthorat): Allow arguments passed to the embedding signal.
214
- self._embedding_signal = get_signal_by_type(self.embedding, TextEmbeddingSignal)()
215
 
216
- def get_embedding_signal(self) -> TextEmbeddingSignal:
217
- """Return the embedding signal."""
218
- assert self._embedding_signal is not None
219
- return self._embedding_signal
220
 
221
- @override
222
- def key(self, is_computed_signal: Optional[bool] = False) -> str:
223
- # NOTE: The embedding and split already exists in the path structure. This means we do not
224
- # need to provide the signal names as part of the key, which still guarantees uniqueness.
 
 
225
 
226
- args_dict = self.dict(exclude_unset=True)
227
- if 'signal_name' in args_dict:
228
- del args_dict['signal_name']
229
- del args_dict['embedding']
230
- return self.name + _args_key_from_dict(args_dict)
 
 
 
 
 
 
 
 
231
 
232
 
233
  Tsignal = TypeVar('Tsignal', bound=Signal)
 
6
  from pydantic import BaseModel, Extra, validator
7
  from typing_extensions import override
8
 
9
+ from ..embeddings.vector_store import VectorDBIndex
10
+ from ..schema import Field, Item, PathKey, RichData, SignalInputType, field
11
 
12
  EMBEDDING_KEY = 'embedding'
13
 
 
19
  # The display name is just used for rendering in the UI.
20
  display_name: ClassVar[Optional[str]]
21
 
22
+ # The input type is used to populate the UI to determine what the signal accepts as input.
 
 
23
  input_type: ClassVar[SignalInputType]
 
 
24
 
25
  # The signal_name will get populated in init automatically from the class name so it gets
26
  # serialized and the signal author doesn't have to define both the static property and the field.
 
75
  """
76
  raise NotImplementedError
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  def key(self, is_computed_signal: Optional[bool] = False) -> str:
79
  """Get the key for a signal.
80
 
 
117
  class TextSplitterSignal(Signal):
118
  """An interface for signals that compute over text."""
119
  input_type = SignalInputType.TEXT
 
120
 
121
  @override
122
  def fields(self) -> Field:
 
127
  class TextSignal(Signal):
128
  """An interface for signals that compute over text."""
129
  input_type = SignalInputType.TEXT
 
130
 
131
  @override
132
  def key(self, is_computed_signal: Optional[bool] = False) -> str:
 
139
  class TextEmbeddingSignal(TextSignal):
140
  """An interface for signals that compute embeddings for text."""
141
  input_type = SignalInputType.TEXT
 
142
 
143
  _split = True
144
 
 
155
  return field(fields=[field('string_span', fields={EMBEDDING_KEY: 'embedding'})])
156
 
157
 
158
+ class VectorSignal(Signal, abc.ABC):
159
+ """An interface for signals that can compute items given vector inputs."""
 
 
 
 
 
160
  embedding: str
 
161
 
162
+ def vector_compute(self, keys: Iterable[PathKey],
163
+ vector_index: VectorDBIndex) -> Iterable[Optional[Item]]:
164
+ """Compute the signal for an iterable of keys that point to documents or images.
165
 
166
+ Args:
167
+ keys: An iterable of value ids (at row-level or lower) to lookup precomputed embeddings.
168
+ vector_index: The vector index to lookup pre-computed embeddings.
169
 
170
+ Returns
171
+ An iterable of items. Sparse signals should return "None" for skipped inputs.
172
+ """
173
+ raise NotImplementedError
174
 
175
+ def vector_compute_topk(
176
+ self,
177
+ topk: int,
178
+ vector_index: VectorDBIndex,
179
+ keys: Optional[Iterable[PathKey]] = None) -> Sequence[tuple[PathKey, Optional[Item]]]:
180
+ """Return signal results only for the top k documents or images.
181
 
182
+ Signals decide how to rank each document/image in the dataset, usually by a similarity score
183
+ obtained via the vector store.
184
+
185
+ Args:
186
+ topk: The number of items to return, ranked by the signal.
187
+ vector_index: The vector index to lookup pre-computed embeddings.
188
+ keys: Optional iterable of row ids to restrict the search to.
189
+
190
+ Returns
191
+ A list of (key, signal_output) tuples containing the `topk` items. Sparse signals should
192
+ return "None" for skipped inputs.
193
+ """
194
+ raise NotImplementedError
195
 
196
 
197
  Tsignal = TypeVar('Tsignal', bound=Signal)
lilac/signals/splitters/chunk_splitter.py CHANGED
@@ -41,7 +41,7 @@ from ..signal import TextSplitterSignal
41
 
42
  TextChunk = tuple[str, tuple[int, int]]
43
 
44
- DEFAULT_SEPARATORS = ['\n\n', '\n', ' ', '']
45
  CHUNK_SIZE = 400
46
  CHUNK_OVERLAP = 50
47
 
@@ -99,10 +99,24 @@ def _sep_split(text: str, separator: str) -> list[TextChunk]:
99
 
100
  offset = 0
101
  chunks: list[TextChunk] = []
 
102
  end_index = text.find(separator, offset)
103
 
104
  while end_index >= 0:
105
- chunks.append((text[offset:end_index], (offset, end_index)))
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  offset = end_index + len(separator)
107
  end_index = text.find(separator, offset)
108
 
@@ -120,9 +134,8 @@ def split_text(text: str,
120
  """Split incoming text and return chunks."""
121
 
122
  def _merge_splits(splits: Iterable[TextChunk], separator: str) -> list[TextChunk]:
123
- # We now want to combine these smaller pieces into medium size
124
- # chunks to send to the LLM.
125
- separator_len = length_function(separator)
126
 
127
  docs: list[TextChunk] = []
128
  current_doc: list[TextChunk] = []
@@ -157,12 +170,14 @@ def split_text(text: str,
157
  final_chunks: list[TextChunk] = []
158
  # Get appropriate separator to use
159
  separator = separators[-1]
160
- for _s in separators:
 
161
  if _s == '':
162
  separator = _s
163
  break
164
  if _s in text:
165
  separator = _s
 
166
  break
167
  # Now that we have the separator, split the text.
168
  splits = _sep_split(text, separator)
@@ -177,7 +192,8 @@ def split_text(text: str,
177
  merged_text = _merge_splits(good_splits, separator)
178
  final_chunks.extend(merged_text)
179
  good_splits = []
180
- other_chunks = split_text(text_chunk, chunk_size, chunk_overlap, separators, length_function)
 
181
  # Adjust the offsets of the other chunks.
182
  other_chunks = [(t, (s + start, e + start)) for t, (s, e) in other_chunks]
183
  final_chunks.extend(other_chunks)
@@ -188,6 +204,9 @@ def split_text(text: str,
188
 
189
 
190
  def _join_chunks(chunks: list[TextChunk], separator: str) -> Optional[TextChunk]:
 
 
 
191
  text = separator.join([text for text, _ in chunks])
192
  text = text.strip()
193
  if text == '':
 
41
 
42
  TextChunk = tuple[str, tuple[int, int]]
43
 
44
+ DEFAULT_SEPARATORS = ['```', '\n\n', '\n', ' ', '']
45
  CHUNK_SIZE = 400
46
  CHUNK_OVERLAP = 50
47
 
 
99
 
100
  offset = 0
101
  chunks: list[TextChunk] = []
102
+ open_code_block = False
103
  end_index = text.find(separator, offset)
104
 
105
  while end_index >= 0:
106
+ if separator == '```':
107
+ # We want to keep the code block seperators as part of the text chunk.
108
+ start = max(0, offset - len(separator))
109
+ if open_code_block:
110
+ end = end_index + len(separator)
111
+ open_code_block = False
112
+ else:
113
+ end = end_index
114
+ open_code_block = True
115
+ else:
116
+ start = offset
117
+ end = end_index
118
+
119
+ chunks.append((text[start:end], (start, end)))
120
  offset = end_index + len(separator)
121
  end_index = text.find(separator, offset)
122
 
 
134
  """Split incoming text and return chunks."""
135
 
136
  def _merge_splits(splits: Iterable[TextChunk], separator: str) -> list[TextChunk]:
137
+ # We now want to combine these smaller pieces into medium size chunks to send to the LLM.
138
+ separator_len = 0 if separator == '```' else length_function(separator)
 
139
 
140
  docs: list[TextChunk] = []
141
  current_doc: list[TextChunk] = []
 
170
  final_chunks: list[TextChunk] = []
171
  # Get appropriate separator to use
172
  separator = separators[-1]
173
+ new_separators: list[str] = []
174
+ for i, _s in enumerate(separators):
175
  if _s == '':
176
  separator = _s
177
  break
178
  if _s in text:
179
  separator = _s
180
+ new_separators = separators[i + 1:]
181
  break
182
  # Now that we have the separator, split the text.
183
  splits = _sep_split(text, separator)
 
192
  merged_text = _merge_splits(good_splits, separator)
193
  final_chunks.extend(merged_text)
194
  good_splits = []
195
+ other_chunks = split_text(text_chunk, chunk_size, chunk_overlap, new_separators,
196
+ length_function)
197
  # Adjust the offsets of the other chunks.
198
  other_chunks = [(t, (s + start, e + start)) for t, (s, e) in other_chunks]
199
  final_chunks.extend(other_chunks)
 
204
 
205
 
206
  def _join_chunks(chunks: list[TextChunk], separator: str) -> Optional[TextChunk]:
207
+ if separator == '```':
208
+ # Code blocks already have the separator.
209
+ separator = ''
210
  text = separator.join([text for text, _ in chunks])
211
  text = text.strip()
212
  if text == '':
lilac/signals/substring_search.py CHANGED
@@ -14,7 +14,6 @@ class SubstringSignal(Signal):
14
  name = 'substring_search'
15
  display_name = 'Substring Search'
16
  input_type = SignalInputType.TEXT
17
- compute_type = SignalInputType.TEXT
18
 
19
  query: str
20
 
 
14
  name = 'substring_search'
15
  display_name = 'Substring Search'
16
  input_type = SignalInputType.TEXT
 
17
 
18
  query: str
19
 
lilac/signals/text_statistics.py CHANGED
@@ -13,6 +13,7 @@ SPACY_BATCH_SIZE = 128
13
  NUM_CHARS = 'num_characters'
14
  READABILITY = 'readability'
15
  TYPE_TOKEN_RATIO = 'log(type_token_ratio)'
 
16
 
17
  if TYPE_CHECKING:
18
  from spacy import Language
@@ -28,11 +29,14 @@ class TextStatisticsSignal(TextSignal):
28
 
29
  @override
30
  def fields(self) -> Field:
31
- return field(fields={
32
- NUM_CHARS: 'int32',
33
- READABILITY: 'float32',
34
- TYPE_TOKEN_RATIO: 'float32',
35
- })
 
 
 
36
 
37
  @override
38
  def setup(self) -> None:
@@ -71,15 +75,27 @@ class TextStatisticsSignal(TextSignal):
71
  # available statistics.
72
  corpus = textacy.corpus.Corpus(lang=self._lang, data=batch)
73
  for doc in cast(Iterable['Doc'], corpus):
74
- if not len(doc):
75
  yield None
76
  continue
77
- readability = text_stats.readability.automated_readability_index(doc)
78
- ttr = text_stats.diversity.log_ttr(doc)
79
- num_chars = text_stats.basics.n_chars(doc)
 
 
 
 
 
 
 
 
 
 
 
80
 
81
  yield {
82
  NUM_CHARS: num_chars,
83
  READABILITY: readability,
84
  TYPE_TOKEN_RATIO: ttr,
 
85
  }
 
13
  NUM_CHARS = 'num_characters'
14
  READABILITY = 'readability'
15
  TYPE_TOKEN_RATIO = 'log(type_token_ratio)'
16
+ FRAC_NON_ASCII = 'frac_non_ascii'
17
 
18
  if TYPE_CHECKING:
19
  from spacy import Language
 
29
 
30
  @override
31
  def fields(self) -> Field:
32
+ return field(
33
+ fields={
34
+ NUM_CHARS: 'int32',
35
+ READABILITY: 'float32',
36
+ TYPE_TOKEN_RATIO: 'float32',
37
+ FRAC_NON_ASCII: field(
38
+ 'float32', bins=[('Low', None, 0.15), ('Medium', 0.15, 0.3), ('High', 0.3, None)])
39
+ })
40
 
41
  @override
42
  def setup(self) -> None:
 
75
  # available statistics.
76
  corpus = textacy.corpus.Corpus(lang=self._lang, data=batch)
77
  for doc in cast(Iterable['Doc'], corpus):
78
+ if not doc or not doc.text.strip():
79
  yield None
80
  continue
81
+ try:
82
+ readability = text_stats.readability.automated_readability_index(doc)
83
+ except ZeroDivisionError:
84
+ readability = None
85
+ try:
86
+ ttr = text_stats.diversity.log_ttr(doc)
87
+ except ValueError:
88
+ ttr = None
89
+ num_chars = len(doc.text)
90
+ num_non_ascii = 0
91
+ for c in doc.text:
92
+ if ord(c) >= 128:
93
+ num_non_ascii += 1
94
+ frac_non_ascii = num_non_ascii / num_chars if num_chars else 0
95
 
96
  yield {
97
  NUM_CHARS: num_chars,
98
  READABILITY: readability,
99
  TYPE_TOKEN_RATIO: ttr,
100
+ FRAC_NON_ASCII: frac_non_ascii
101
  }
lilac/web/_app/immutable/assets/0.d7803630.css ADDED
The diff for this file is too large to render. See raw diff
 
lilac/web/_app/immutable/assets/ConceptView.98f1ad48.css ADDED
@@ -0,0 +1 @@
 
 
1
+ button.svelte-d3v0kx{width:100%;padding:.5rem 1rem;text-align:left;--tw-text-opacity:1;color:rgb(31 41 55 / var(--tw-text-opacity))}button.svelte-d3v0kx:hover{--tw-bg-opacity:1;background-color:rgb(229 231 235 / var(--tw-bg-opacity));--tw-text-opacity:1;color:rgb(0 0 0 / var(--tw-text-opacity))}button[data-active=true].svelte-d3v0kx{--tw-bg-opacity:1;background-color:rgb(209 213 219 / var(--tw-bg-opacity));--tw-text-opacity:1;color:rgb(0 0 0 / var(--tw-text-opacity))}.bx--tag{margin:0}.concept-score-pill .bx--tooltip__label{margin-right:.25rem;display:inline-block;height:100%;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;max-width:5rem}.concept-score-pill .bx--tooltip__content{display:flex;flex-direction:column;align-items:center}table.svelte-zc5pc5 td.svelte-zc5pc5{padding:.25rem .5rem}.embedding-badge-nolabel .bx--tooltip__label .bx--tooltip__trigger{margin:0}.embedding-badge-nolabel .bx--tag__custom-icon{margin-right:0}.more-button .bx--btn{height:1.5rem;width:12rem}.named-value-name.svelte-1689hje{max-width:15rem}.highlight-span.svelte-8ox5pu{padding-top:1.5px;padding-bottom:1.5px}.highlight-span pre{--tw-bg-opacity:1;background-color:rgb(226 232 240 / var(--tw-bg-opacity));font-size:.875rem;line-height:1.25rem}.highlight-span p,.highlight-span pre{margin-top:.75rem;margin-bottom:.75rem}.highlight-span p:first-child{display:inline!important}.highlight-span p:last-child{display:inline!important}.highlight-span p,.highlight-span h1{background-color:inherit}.highlight-span p{font-size:.875rem;line-height:1.25rem;font-weight:inherit}.dataset-link.bx--btn{min-height:0px}