lstetson commited on
Commit
703bede
1 Parent(s): 7962541

Delete TESTING.ipynb

Browse files
Files changed (1) hide show
  1. TESTING.ipynb +0 -165
TESTING.ipynb DELETED
@@ -1,165 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": null,
6
- "metadata": {},
7
- "outputs": [],
8
- "source": [
9
- "from models import etl"
10
- ]
11
- },
12
- {
13
- "cell_type": "code",
14
- "execution_count": null,
15
- "metadata": {},
16
- "outputs": [],
17
- "source": [
18
- "etl.main(json_path='data/single_video.json', db='data/single_video.db', batch_size=5, overlap=2)"
19
- ]
20
- },
21
- {
22
- "cell_type": "code",
23
- "execution_count": 1,
24
- "metadata": {},
25
- "outputs": [],
26
- "source": [
27
- "import chromadb\n",
28
- "from models import etl"
29
- ]
30
- },
31
- {
32
- "cell_type": "code",
33
- "execution_count": 2,
34
- "metadata": {},
35
- "outputs": [],
36
- "source": [
37
- "client = chromadb.PersistentClient('data/single_video.db')\n",
38
- "collection= client.get_collection('huberman_videos')\n",
39
- "# collection.count()\n",
40
- "# collection.peek()\n",
41
- "\n",
42
- "query_text = \"What are the components of an LLM?\"\n",
43
- "query_embedding = etl.embed_text(query_text)\n",
44
- "results = collection.query(query_texts=[query_text], n_results=5)"
45
- ]
46
- },
47
- {
48
- "cell_type": "code",
49
- "execution_count": 3,
50
- "metadata": {},
51
- "outputs": [
52
- {
53
- "data": {
54
- "text/plain": [
55
- "{'ids': [['5sLYAQS9sWQ__33',\n",
56
- " '5sLYAQS9sWQ__36',\n",
57
- " '5sLYAQS9sWQ__3',\n",
58
- " '5sLYAQS9sWQ__6',\n",
59
- " '5sLYAQS9sWQ__27']],\n",
60
- " 'distances': [[0.27329726119651687,\n",
61
- " 0.3594438065792097,\n",
62
- " 0.4730243492988927,\n",
63
- " 0.5004446084705303,\n",
64
- " 0.5766584257317211]],\n",
65
- " 'metadatas': [[{'segment_id': '5sLYAQS9sWQ__33',\n",
66
- " 'source': 'https://www.youtube.com/watch?v=5sLYAQS9sWQ&t=145.328s',\n",
67
- " 'title': 'How Large Language Models Work',\n",
68
- " 'video_id': '5sLYAQS9sWQ'},\n",
69
- " {'segment_id': '5sLYAQS9sWQ__36',\n",
70
- " 'source': 'https://www.youtube.com/watch?v=5sLYAQS9sWQ&t=154.367s',\n",
71
- " 'title': 'How Large Language Models Work',\n",
72
- " 'video_id': '5sLYAQS9sWQ'},\n",
73
- " {'segment_id': '5sLYAQS9sWQ__3',\n",
74
- " 'source': 'https://www.youtube.com/watch?v=5sLYAQS9sWQ&t=10.783s',\n",
75
- " 'title': 'How Large Language Models Work',\n",
76
- " 'video_id': '5sLYAQS9sWQ'},\n",
77
- " {'segment_id': '5sLYAQS9sWQ__6',\n",
78
- " 'source': 'https://www.youtube.com/watch?v=5sLYAQS9sWQ&t=22.544s',\n",
79
- " 'title': 'How Large Language Models Work',\n",
80
- " 'video_id': '5sLYAQS9sWQ'},\n",
81
- " {'segment_id': '5sLYAQS9sWQ__27',\n",
82
- " 'source': 'https://www.youtube.com/watch?v=5sLYAQS9sWQ&t=117.572s',\n",
83
- " 'title': 'How Large Language Models Work',\n",
84
- " 'video_id': '5sLYAQS9sWQ'}]],\n",
85
- " 'embeddings': None,\n",
86
- " 'documents': [['All right, so how do they work? Well, we can think of it like this. LLM equals three things: data, architecture, and lastly, we can think of it as training. Those three things are really the components of an LLM.',\n",
87
- " \"data, architecture, and lastly, we can think of it as training. Those three things are really the components of an LLM. Now, we've already discussed the enormous amounts of text data that goes into these things. As for the architecture, this is a neural network and for GPT that is a transformer.\",\n",
88
- " 'And I\\'ve been using GPT in its various forms for years. In this video we are going to number 1, ask \"what is an LLM?\" Number 2, we are going to describe how they work. And then number 3,',\n",
89
- " 'Number 2, we are going to describe how they work. And then number 3, we\\'re going to ask, \"what are the business applications of LLMs?\" So let\\'s start with number 1, \"what is a large language model?\" Well, a large language model',\n",
90
- " \"Yeah, that's truly a lot of text. And LLMs are also among the biggest models when it comes to parameter count. A parameter is a value the model can change independently as it learns, and the more parameters a model has, the more complex it can be. GPT-3, for example, is pre-trained on a corpus of actually 45 terabytes of data,\"]],\n",
91
- " 'uris': None,\n",
92
- " 'data': None}"
93
- ]
94
- },
95
- "execution_count": 3,
96
- "metadata": {},
97
- "output_type": "execute_result"
98
- }
99
- ],
100
- "source": [
101
- "results"
102
- ]
103
- },
104
- {
105
- "cell_type": "code",
106
- "execution_count": 10,
107
- "metadata": {},
108
- "outputs": [
109
- {
110
- "name": "stdout",
111
- "output_type": "stream",
112
- "text": [
113
- "CONTEXT: All right, so how do they work? Well, we can think of it like this. LLM equals three things: data, architecture, and lastly, we can think of it as training. Those three things are really the components of an LLM.\n",
114
- "TITLE: How Large Language Models Work\n",
115
- "SOURCE: https://www.youtube.com/watch?v=5sLYAQS9sWQ&t=145.328s\n",
116
- "\n",
117
- "CONTEXT: data, architecture, and lastly, we can think of it as training. Those three things are really the components of an LLM. Now, we've already discussed the enormous amounts of text data that goes into these things. As for the architecture, this is a neural network and for GPT that is a transformer.\n",
118
- "TITLE: How Large Language Models Work\n",
119
- "SOURCE: https://www.youtube.com/watch?v=5sLYAQS9sWQ&t=154.367s\n",
120
- "\n",
121
- "CONTEXT: And I've been using GPT in its various forms for years. In this video we are going to number 1, ask \"what is an LLM?\" Number 2, we are going to describe how they work. And then number 3,\n",
122
- "TITLE: How Large Language Models Work\n",
123
- "SOURCE: https://www.youtube.com/watch?v=5sLYAQS9sWQ&t=10.783s\n",
124
- "\n",
125
- "CONTEXT: Number 2, we are going to describe how they work. And then number 3, we're going to ask, \"what are the business applications of LLMs?\" So let's start with number 1, \"what is a large language model?\" Well, a large language model\n",
126
- "TITLE: How Large Language Models Work\n",
127
- "SOURCE: https://www.youtube.com/watch?v=5sLYAQS9sWQ&t=22.544s\n",
128
- "\n",
129
- "CONTEXT: Yeah, that's truly a lot of text. And LLMs are also among the biggest models when it comes to parameter count. A parameter is a value the model can change independently as it learns, and the more parameters a model has, the more complex it can be. GPT-3, for example, is pre-trained on a corpus of actually 45 terabytes of data,\n",
130
- "TITLE: How Large Language Models Work\n",
131
- "SOURCE: https://www.youtube.com/watch?v=5sLYAQS9sWQ&t=117.572s\n",
132
- "\n",
133
- "\n"
134
- ]
135
- }
136
- ],
137
- "source": [
138
- "from models.llm import format_context\n",
139
- "\n",
140
- "print(format_context(results))"
141
- ]
142
- }
143
- ],
144
- "metadata": {
145
- "kernelspec": {
146
- "display_name": "Python 3",
147
- "language": "python",
148
- "name": "python3"
149
- },
150
- "language_info": {
151
- "codemirror_mode": {
152
- "name": "ipython",
153
- "version": 3
154
- },
155
- "file_extension": ".py",
156
- "mimetype": "text/x-python",
157
- "name": "python",
158
- "nbconvert_exporter": "python",
159
- "pygments_lexer": "ipython3",
160
- "version": "3.11.1"
161
- }
162
- },
163
- "nbformat": 4,
164
- "nbformat_minor": 2
165
- }