nsthorat commited on
Commit
1645d4b
1 Parent(s): 9af2f81
.env CHANGED
@@ -1,7 +1,9 @@
1
  # To overwrite these variables, create a .env.local file
2
 
3
  # The path to the project directory. When used, this will be the global project directory for lilac.
4
- # When not defined, define the project path with `lilac start ./data`.
 
 
5
  # LILAC_DATA_PATH=./data
6
 
7
  # Set to 1 for duckdb to use views instead of materialized tables (lower memory usage, but slower).
@@ -43,3 +45,6 @@ GOOGLE_CLIENT_ID='279475920249-i8llm8vbos1vj5m1qocir8narb3r0enu.apps.googleuserc
43
  # LangSmith source setup.
44
  # LANGCHAIN_API_KEY=
45
  LANGCHAIN_ENDPOINT=https://api.smith.langchain.com
 
 
 
 
1
  # To overwrite these variables, create a .env.local file
2
 
3
  # The path to the project directory. When used, this will be the global project directory for lilac.
4
+ # When not defined, define the project directory with `lilac start ./data`.
5
+ # LILAC_PROJECT_DIR=./data
6
+ # NOTE: This is deprecated in favor of LILAC_PROJECT_DIR.
7
  # LILAC_DATA_PATH=./data
8
 
9
  # Set to 1 for duckdb to use views instead of materialized tables (lower memory usage, but slower).
 
45
  # LangSmith source setup.
46
  # LANGCHAIN_API_KEY=
47
  LANGCHAIN_ENDPOINT=https://api.smith.langchain.com
48
+
49
+ # Firebase deployment token.
50
+ # FIREBASE_TOKEN=
.env.demo CHANGED
@@ -1,4 +1,4 @@
1
- LILAC_DATA_PATH='/data'
2
  HF_HOME=/data/.huggingface
3
  TRANSFORMERS_CACHE=/data/.cache
4
  XDG_CACHE_HOME=/data/.cache
 
1
+ LILAC_PROJECT_DIR='/data'
2
  HF_HOME=/data/.huggingface
3
  TRANSFORMERS_CACHE=/data/.cache
4
  XDG_CACHE_HOME=/data/.cache
.gitattributes DELETED
@@ -1,3 +0,0 @@
1
- dist/lilac-0.0.19-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
2
- data/.cache/lilac/concept/lilac/profanity/gte-small.pkl filter=lfs diff=lfs merge=lfs -text
3
- data/.cache/lilac/concept/lilac/toxicity/gte-small.pkl filter=lfs diff=lfs merge=lfs -text
 
 
 
 
data/.cache/lilac/concept/100712716653593140239/aliens/gte-small.pkl DELETED
Binary file (10.8 kB)
 
data/.cache/lilac/concept/100712716653593140239/alienz/gte-small.pkl DELETED
Binary file (21.7 kB)
 
data/.cache/lilac/concept/100712716653593140239/asdf/gte-small.pkl DELETED
Binary file (21.7 kB)
 
data/.cache/lilac/concept/100712716653593140239/private_aliens/gte-small.pkl DELETED
Binary file (21.8 kB)
 
data/.cache/lilac/concept/lilac/legal-termination/gte-small.pkl DELETED
Binary file (60.6 kB)
 
data/.cache/lilac/concept/lilac/negative-sentiment/gte-small.pkl DELETED
Binary file (202 kB)
 
data/.cache/lilac/concept/lilac/non-english/gte-small.pkl DELETED
Binary file (331 kB)
 
data/.cache/lilac/concept/lilac/positive-sentiment/gte-small.pkl DELETED
Binary file (180 kB)
 
data/.cache/lilac/concept/lilac/profanity/gte-small.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed7340614b1dea910ddeb26bbda0167b1f4fe2479071a62a70b63c18bc6232d0
3
- size 1672960
 
 
 
 
data/.cache/lilac/concept/lilac/question/gte-small.pkl DELETED
Binary file (611 kB)
 
data/.cache/lilac/concept/lilac/source-code/gte-small.pkl DELETED
Binary file (147 kB)
 
data/.cache/lilac/concept/lilac/toxicity/gte-small.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2af2736f3d749391a431f9c24d3fc78cf8e58457cc4f0d1ce770185b92d879c
3
- size 1886446
 
 
 
 
data/.cache/lilac/concept/local/aliens/gte-small.pkl DELETED
Binary file (28.5 kB)
 
data/lilac.yml CHANGED
@@ -258,3 +258,168 @@ datasets:
258
  ui:
259
  media_paths:
260
  - premise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
  ui:
259
  media_paths:
260
  - premise
261
+ - namespace: lilac
262
+ name: mmlu_professional_law
263
+ source:
264
+ dataset_name: cais/mmlu
265
+ config_name: professional_law
266
+ source_name: huggingface
267
+ embeddings:
268
+ - path: question
269
+ embedding: gte-small
270
+ - path:
271
+ - choices
272
+ - '*'
273
+ embedding: gte-small
274
+ signals:
275
+ - path: question
276
+ signal:
277
+ signal_name: near_dup
278
+ - path: question
279
+ signal:
280
+ signal_name: pii
281
+ - path: question
282
+ signal:
283
+ signal_name: lang_detection
284
+ - path: question
285
+ signal:
286
+ embedding: gte-small
287
+ namespace: lilac
288
+ concept_name: positive-sentiment
289
+ signal_name: concept_score
290
+ - path: question
291
+ signal:
292
+ embedding: gte-small
293
+ namespace: lilac
294
+ concept_name: non-english
295
+ signal_name: concept_score
296
+ - path: question
297
+ signal:
298
+ embedding: gte-small
299
+ namespace: lilac
300
+ concept_name: toxicity
301
+ signal_name: concept_score
302
+ - path: question
303
+ signal:
304
+ embedding: gte-small
305
+ namespace: lilac
306
+ concept_name: question
307
+ signal_name: concept_score
308
+ - path: question
309
+ signal:
310
+ embedding: gte-small
311
+ namespace: lilac
312
+ concept_name: legal-termination
313
+ signal_name: concept_score
314
+ - path: question
315
+ signal:
316
+ embedding: gte-small
317
+ namespace: lilac
318
+ concept_name: source-code
319
+ signal_name: concept_score
320
+ - path: question
321
+ signal:
322
+ embedding: gte-small
323
+ namespace: lilac
324
+ concept_name: negative-sentiment
325
+ signal_name: concept_score
326
+ - path: question
327
+ signal:
328
+ embedding: gte-small
329
+ namespace: lilac
330
+ concept_name: profanity
331
+ signal_name: concept_score
332
+ - path: question
333
+ signal:
334
+ signal_name: text_statistics
335
+ - path:
336
+ - choices
337
+ - '*'
338
+ signal:
339
+ signal_name: near_dup
340
+ - path:
341
+ - choices
342
+ - '*'
343
+ signal:
344
+ signal_name: pii
345
+ - path:
346
+ - choices
347
+ - '*'
348
+ signal:
349
+ signal_name: lang_detection
350
+ - path:
351
+ - choices
352
+ - '*'
353
+ signal:
354
+ embedding: gte-small
355
+ namespace: lilac
356
+ concept_name: positive-sentiment
357
+ signal_name: concept_score
358
+ - path:
359
+ - choices
360
+ - '*'
361
+ signal:
362
+ embedding: gte-small
363
+ namespace: lilac
364
+ concept_name: non-english
365
+ signal_name: concept_score
366
+ - path:
367
+ - choices
368
+ - '*'
369
+ signal:
370
+ embedding: gte-small
371
+ namespace: lilac
372
+ concept_name: toxicity
373
+ signal_name: concept_score
374
+ - path:
375
+ - choices
376
+ - '*'
377
+ signal:
378
+ embedding: gte-small
379
+ namespace: lilac
380
+ concept_name: question
381
+ signal_name: concept_score
382
+ - path:
383
+ - choices
384
+ - '*'
385
+ signal:
386
+ embedding: gte-small
387
+ namespace: lilac
388
+ concept_name: legal-termination
389
+ signal_name: concept_score
390
+ - path:
391
+ - choices
392
+ - '*'
393
+ signal:
394
+ embedding: gte-small
395
+ namespace: lilac
396
+ concept_name: source-code
397
+ signal_name: concept_score
398
+ - path:
399
+ - choices
400
+ - '*'
401
+ signal:
402
+ embedding: gte-small
403
+ namespace: lilac
404
+ concept_name: negative-sentiment
405
+ signal_name: concept_score
406
+ - path:
407
+ - choices
408
+ - '*'
409
+ signal:
410
+ embedding: gte-small
411
+ namespace: lilac
412
+ concept_name: profanity
413
+ signal_name: concept_score
414
+ - path:
415
+ - choices
416
+ - '*'
417
+ signal:
418
+ signal_name: text_statistics
419
+ settings:
420
+ ui:
421
+ media_paths:
422
+ - question
423
+ - - choices
424
+ - '*'
425
+ preferred_embedding: gte-small
dist/README.md DELETED
@@ -1,2 +0,0 @@
1
- This directory is used for locally built whl files.
2
- We write a README.md to ensure an empty folder is uploaded when there is no whl.
 
 
 
dist/lilac-0.0.19-py3-none-any.whl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:09c9663b9e8ad2e05eea12ecf1324ad990c0e410454b5ea35afd687c33fff60f
3
- size 1142074
 
 
 
 
docker_start.py CHANGED
@@ -8,7 +8,7 @@ import yaml
8
  from huggingface_hub import scan_cache_dir, snapshot_download
9
 
10
  from lilac.concepts.db_concept import CONCEPTS_DIR, DiskConceptDB, get_concept_output_dir
11
- from lilac.env import data_path, env
12
  from lilac.project import PROJECT_CONFIG_FILENAME
13
  from lilac.utils import get_datasets_dir, get_lilac_cache_dir, log
14
 
@@ -67,7 +67,7 @@ def main() -> None:
67
  repo_id=lilac_hf_dataset,
68
  repo_type='dataset',
69
  token=env('HF_ACCESS_TOKEN'),
70
- local_dir=get_datasets_dir(data_path()),
71
  ignore_patterns=['.gitattributes', 'README.md'])
72
 
73
  snapshot_dir = snapshot_download(repo_id=repo_id, repo_type='space', token=env('HF_ACCESS_TOKEN'))
@@ -76,16 +76,16 @@ def main() -> None:
76
  # Copy the config file.
77
  project_config_file = os.path.join(spaces_data_dir, PROJECT_CONFIG_FILENAME)
78
  if os.path.exists(project_config_file):
79
- shutil.copy(project_config_file, os.path.join(data_path(), PROJECT_CONFIG_FILENAME))
80
 
81
  # Delete cache files from persistent storage.
82
- cache_dir = get_lilac_cache_dir(data_path())
83
  if os.path.exists(cache_dir):
84
  shutil.rmtree(cache_dir)
85
 
86
  # NOTE: This is temporary during the move of concepts into the pip package. Once all the demos
87
  # have been updated, this block can be deleted.
88
- old_lilac_concepts_data_dir = os.path.join(data_path(), CONCEPTS_DIR, 'lilac')
89
  if os.path.exists(old_lilac_concepts_data_dir):
90
  shutil.rmtree(old_lilac_concepts_data_dir)
91
 
@@ -102,7 +102,8 @@ def main() -> None:
102
  continue
103
  spaces_concept_output_dir = get_concept_output_dir(spaces_data_dir, concept.namespace,
104
  concept.name)
105
- persistent_output_dir = get_concept_output_dir(data_path(), concept.namespace, concept.name)
 
106
  shutil.rmtree(persistent_output_dir, ignore_errors=True)
107
  shutil.copytree(spaces_concept_output_dir, persistent_output_dir, dirs_exist_ok=True)
108
  shutil.rmtree(spaces_concept_output_dir, ignore_errors=True)
 
8
  from huggingface_hub import scan_cache_dir, snapshot_download
9
 
10
  from lilac.concepts.db_concept import CONCEPTS_DIR, DiskConceptDB, get_concept_output_dir
11
+ from lilac.env import env, get_project_dir
12
  from lilac.project import PROJECT_CONFIG_FILENAME
13
  from lilac.utils import get_datasets_dir, get_lilac_cache_dir, log
14
 
 
67
  repo_id=lilac_hf_dataset,
68
  repo_type='dataset',
69
  token=env('HF_ACCESS_TOKEN'),
70
+ local_dir=get_datasets_dir(get_project_dir()),
71
  ignore_patterns=['.gitattributes', 'README.md'])
72
 
73
  snapshot_dir = snapshot_download(repo_id=repo_id, repo_type='space', token=env('HF_ACCESS_TOKEN'))
 
76
  # Copy the config file.
77
  project_config_file = os.path.join(spaces_data_dir, PROJECT_CONFIG_FILENAME)
78
  if os.path.exists(project_config_file):
79
+ shutil.copy(project_config_file, os.path.join(get_project_dir(), PROJECT_CONFIG_FILENAME))
80
 
81
  # Delete cache files from persistent storage.
82
+ cache_dir = get_lilac_cache_dir(get_project_dir())
83
  if os.path.exists(cache_dir):
84
  shutil.rmtree(cache_dir)
85
 
86
  # NOTE: This is temporary during the move of concepts into the pip package. Once all the demos
87
  # have been updated, this block can be deleted.
88
+ old_lilac_concepts_data_dir = os.path.join(get_project_dir(), CONCEPTS_DIR, 'lilac')
89
  if os.path.exists(old_lilac_concepts_data_dir):
90
  shutil.rmtree(old_lilac_concepts_data_dir)
91
 
 
102
  continue
103
  spaces_concept_output_dir = get_concept_output_dir(spaces_data_dir, concept.namespace,
104
  concept.name)
105
+ persistent_output_dir = get_concept_output_dir(get_project_dir(), concept.namespace,
106
+ concept.name)
107
  shutil.rmtree(persistent_output_dir, ignore_errors=True)
108
  shutil.copytree(spaces_concept_output_dir, persistent_output_dir, dirs_exist_ok=True)
109
  shutil.rmtree(spaces_concept_output_dir, ignore_errors=True)