dsmueller commited on
Commit
a391a44
1 Parent(s): 94afdaa

Updated dependencies, run.py

Browse files
Files changed (4) hide show
  1. app.ipynb +83 -0
  2. poetry.lock +0 -0
  3. pyproject.toml +20 -0
  4. run.py +9 -22
app.ipynb ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stderr",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.\n"
13
+ ]
14
+ }
15
+ ],
16
+ "source": [
17
+ "import pickle\n",
18
+ "import datasets\n",
19
+ "from renumics import spotlight\n",
20
+ "from renumics.spotlight import dtypes as spotlight_dtypes\n",
21
+ "import os"
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": 4,
27
+ "metadata": {},
28
+ "outputs": [
29
+ {
30
+ "name": "stdout",
31
+ "output_type": "stream",
32
+ "text": [
33
+ "Dataset loaded using datasets.load_dataset().\n"
34
+ ]
35
+ }
36
+ ],
37
+ "source": [
38
+ "dataset_name=\"ai-aerospace/ac-text-embedding-ada-002-ams-test\"\n",
39
+ "dataset = datasets.load_dataset(dataset_name, split=\"train\")\n",
40
+ "print(\"Dataset loaded using datasets.load_dataset().\")"
41
+ ]
42
+ },
43
+ {
44
+ "cell_type": "code",
45
+ "execution_count": 5,
46
+ "metadata": {},
47
+ "outputs": [],
48
+ "source": [
49
+ "view = spotlight.show(dataset, port=7860, host=\"0.0.0.0\", \n",
50
+ " dtype={\"used_by_questions\": spotlight_dtypes.SequenceDType(spotlight_dtypes.str_dtype)}, \n",
51
+ " allow_filebrowsing=False)"
52
+ ]
53
+ },
54
+ {
55
+ "cell_type": "code",
56
+ "execution_count": null,
57
+ "metadata": {},
58
+ "outputs": [],
59
+ "source": []
60
+ }
61
+ ],
62
+ "metadata": {
63
+ "kernelspec": {
64
+ "display_name": ".venv",
65
+ "language": "python",
66
+ "name": "python3"
67
+ },
68
+ "language_info": {
69
+ "codemirror_mode": {
70
+ "name": "ipython",
71
+ "version": 3
72
+ },
73
+ "file_extension": ".py",
74
+ "mimetype": "text/x-python",
75
+ "name": "python",
76
+ "nbconvert_exporter": "python",
77
+ "pygments_lexer": "ipython3",
78
+ "version": "3.11.1"
79
+ }
80
+ },
81
+ "nbformat": 4,
82
+ "nbformat_minor": 2
83
+ }
poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
pyproject.toml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.poetry]
2
+ name = "aerospace-chatbot-visualize"
3
+ version = "0.1.0"
4
+ description = ""
5
+ authors = ["Dan Mueller <dsm@danmueller.pro>"]
6
+ readme = "README.md"
7
+
8
+ [tool.poetry.dependencies]
9
+ python = ">=3.11,<3.12"
10
+ datasets = "^2.19.0"
11
+ ipykernel = "^6.29.4"
12
+ renumics-spotlight = "1.6.6"
13
+
14
+
15
+ [tool.poetry.group.dev.dependencies]
16
+ ipykernel = "^6.29.4"
17
+
18
+ [build-system]
19
+ requires = ["poetry-core"]
20
+ build-backend = "poetry.core.masonry.api"
run.py CHANGED
@@ -1,30 +1,17 @@
1
  import pickle
2
  import datasets
3
  from renumics import spotlight
 
4
  import os
5
 
6
  if __name__ == "__main__":
7
- cache_file = "dataset_cache.pkl"
8
- if os.path.exists(cache_file):
9
- # Load dataset from cache
10
- with open(cache_file, "rb") as file:
11
- dataset = pickle.load(file)
12
- print("Dataset loaded from cache.")
13
- else:
14
- # Load dataset using datasets.load_dataset()
15
- dataset = datasets.load_dataset("renumics/cifar100-enriched", split="train")
16
- print("Dataset loaded using datasets.load_dataset().")
17
-
18
- # Save dataset to cache
19
- with open(cache_file, "wb") as file:
20
- pickle.dump(dataset, file)
21
-
22
- print("Dataset saved to cache.")
23
-
24
 
25
  df = dataset.to_pandas()
26
- df_show = df.drop(columns=['embedding', 'probabilities'])
27
- while True:
28
- view = spotlight.show(df_show.sample(5000, random_state=1), port=7860, host="0.0.0.0",
29
- dtype={"image": spotlight.Image, "embedding_reduced": spotlight.Embedding}, allow_filebrowsing=False)
30
- view.close()
 
1
  import pickle
2
  import datasets
3
  from renumics import spotlight
4
+ from renumics.spotlight import dtypes as spotlight_dtypes
5
  import os
6
 
7
  if __name__ == "__main__":
8
+ dataset_name="ai-aerospace/ac-text-embedding-ada-002-ams-test"
9
+ # Load dataset using datasets.load_dataset()
10
+ dataset = datasets.load_dataset(dataset_name, split="train")
11
+ print("Dataset loaded using datasets.load_dataset().")
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  df = dataset.to_pandas()
14
+
15
+ view = spotlight.show(df, port=7860, host="0.0.0.0",
16
+ dtype={"used_by_questions": spotlight_dtypes.SequenceDType(spotlight_dtypes.str_dtype)},
17
+ allow_filebrowsing=True)