palexis3 commited on
Commit
44cb1f0
β€’
1 Parent(s): 8a18fd2

Fixed transactions rag jupyter notebook errors

Browse files
app/categorization/categorizer.py CHANGED
@@ -3,6 +3,7 @@ import re
3
  import ast
4
  import json
5
  import logging
 
6
  from typing import Any, List, Tuple, Optional, Dict, Union
7
 
8
  # Third-party library imports
@@ -14,12 +15,12 @@ from tenacity import retry, wait_random_exponential, stop_after_attempt
14
  from pydantic import ValidationError
15
 
16
  # Local application/library specific imports
17
- from langchain.chat_models import ChatOpenAI
18
  from langchain.chains import LLMChain
19
  from langchain.output_parsers import PydanticOutputParser, OutputFixingParser
20
  from langchain.prompts import PromptTemplate
21
- import template as template
22
- from config import CATEGORY_REFERENCE_OUTPUT_FILE, TX_PER_LLM_RUN
23
 
24
 
25
  def fuzzy_match_list_categorizer(
@@ -68,12 +69,14 @@ async def llm_list_categorizer(tx_list: pd.DataFrame) -> pd.DataFrame:
68
  """
69
 
70
  # Initialize language model and prompt
71
- llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0125")
72
- prompt = PromptTemplate.from_template(template=template.CATEGORY_TEMPLATE)
 
 
73
  chain = LLMChain(llm=llm, prompt=prompt)
74
 
75
  # Iterate over the DataFrame in batches of TX_PER_LLM_RUN transactions
76
- tasks = [llm_sublist_categorizer(tx_list.attrs['file_name'], chain=chain, tx_descriptions="\n".join(chunk['description']).strip())
77
  for chunk in np.array_split(tx_list, tx_list.shape[0] // TX_PER_LLM_RUN + 1)]
78
 
79
  # Gather results and extract (valid) outputs
@@ -87,7 +90,7 @@ async def llm_list_categorizer(tx_list: pd.DataFrame) -> pd.DataFrame:
87
  valid_outputs = [output for valid_result in valid_results for output in valid_result]
88
 
89
  # Return a DataFrame with the valid outputs
90
- return pd.DataFrame(valid_outputs, columns=['description', 'category'])
91
 
92
 
93
  @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
 
3
  import ast
4
  import json
5
  import logging
6
+ import os
7
  from typing import Any, List, Tuple, Optional, Dict, Union
8
 
9
  # Third-party library imports
 
15
  from pydantic import ValidationError
16
 
17
  # Local application/library specific imports
18
+ from langchain_openai import ChatOpenAI
19
  from langchain.chains import LLMChain
20
  from langchain.output_parsers import PydanticOutputParser, OutputFixingParser
21
  from langchain.prompts import PromptTemplate
22
+ import app.categorization.template as CATEGORY_TEMPLATE
23
+ from app.categorization.config import CATEGORY_REFERENCE_OUTPUT_FILE, TX_PER_LLM_RUN
24
 
25
 
26
  def fuzzy_match_list_categorizer(
 
69
  """
70
 
71
  # Initialize language model and prompt
72
+ # openai_api_key = os.environ['OPENAI_API_KEY']
73
+ # print("apikey: " + openai_api_key)
74
+ llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0125", api_key="sk-codepath-group-project-3WwlG0qG8GDG7SMVjgFLT3BlbkFJOHIlVsq0UXFqaOV7hl3O")
75
+ prompt = PromptTemplate.from_template(template=CATEGORY_TEMPLATE)
76
  chain = LLMChain(llm=llm, prompt=prompt)
77
 
78
  # Iterate over the DataFrame in batches of TX_PER_LLM_RUN transactions
79
+ tasks = [llm_sublist_categorizer(tx_list.attrs['file_name'], chain=chain, tx_descriptions="\n".join(chunk['name/description']).strip())
80
  for chunk in np.array_split(tx_list, tx_list.shape[0] // TX_PER_LLM_RUN + 1)]
81
 
82
  # Gather results and extract (valid) outputs
 
90
  valid_outputs = [output for valid_result in valid_results for output in valid_result]
91
 
92
  # Return a DataFrame with the valid outputs
93
+ return pd.DataFrame(valid_outputs, columns=['name/description', 'category'])
94
 
95
 
96
  @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
app/categorization/categorizer_list.py CHANGED
@@ -6,8 +6,8 @@ from datetime import datetime
6
  import pandas as pd
7
 
8
  # Local application/library specific imports
9
- from config import CATEGORY_REFERENCE_OUTPUT_FILE
10
- from categorizer import llm_list_categorizer, fuzzy_match_list_categorizer
11
 
12
 
13
  async def categorize_list(tx_list: pd.DataFrame) -> pd.DataFrame:
@@ -29,14 +29,14 @@ async def categorize_list(tx_list: pd.DataFrame) -> pd.DataFrame:
29
  if os.path.exists(CATEGORY_REFERENCE_OUTPUT_FILE):
30
  # Read description-category pairs from the reference file
31
  description_category_pairs = pd.read_csv(
32
- CATEGORY_REFERENCE_OUTPUT_FILE, header=None, names=['description', 'category']
33
  )
34
 
35
  # Extract only descriptions for faster matching
36
- descriptions = description_category_pairs['description'].values
37
 
38
  # Use fuzzy matching to find similar descriptions and assign the category
39
- tx_list['category'] = tx_list['description'].apply(
40
  fuzzy_match_list_categorizer,
41
  args=(descriptions, description_category_pairs),
42
  )
@@ -44,14 +44,14 @@ async def categorize_list(tx_list: pd.DataFrame) -> pd.DataFrame:
44
  # Filter out uncategorized transactions, deduplicate, and sort by description
45
  uncategorized_descriptions = (
46
  tx_list[tx_list['category'].isnull()]
47
- .drop_duplicates(subset=['description'])
48
- .sort_values(by=['description'])
49
  )
50
 
51
  # Ask the language model to categorize the remaining descriptions
52
  if not uncategorized_descriptions.empty:
53
  categorized_descriptions = await llm_list_categorizer(
54
- uncategorized_descriptions[['description', 'category']]
55
  )
56
 
57
  categorized_descriptions.dropna(inplace=True)
@@ -59,8 +59,8 @@ async def categorize_list(tx_list: pd.DataFrame) -> pd.DataFrame:
59
  # Update the category for uncategorized transactions based on the language model results
60
  if not categorized_descriptions.empty:
61
  tx_list['category'] = tx_list['category'].fillna(
62
- tx_list['description'].map(
63
- categorized_descriptions.set_index('description')['category']
64
  )
65
  )
66
 
 
6
  import pandas as pd
7
 
8
  # Local application/library specific imports
9
+ from app.categorization.config import CATEGORY_REFERENCE_OUTPUT_FILE
10
+ from app.categorization.categorizer import llm_list_categorizer, fuzzy_match_list_categorizer
11
 
12
 
13
  async def categorize_list(tx_list: pd.DataFrame) -> pd.DataFrame:
 
29
  if os.path.exists(CATEGORY_REFERENCE_OUTPUT_FILE):
30
  # Read description-category pairs from the reference file
31
  description_category_pairs = pd.read_csv(
32
+ CATEGORY_REFERENCE_OUTPUT_FILE, header=None, names=['name/description', 'category']
33
  )
34
 
35
  # Extract only descriptions for faster matching
36
+ descriptions = description_category_pairs['name/description'].values
37
 
38
  # Use fuzzy matching to find similar descriptions and assign the category
39
+ tx_list['category'] = tx_list['name/description'].apply(
40
  fuzzy_match_list_categorizer,
41
  args=(descriptions, description_category_pairs),
42
  )
 
44
  # Filter out uncategorized transactions, deduplicate, and sort by description
45
  uncategorized_descriptions = (
46
  tx_list[tx_list['category'].isnull()]
47
+ .drop_duplicates(subset=['name/description'])
48
+ .sort_values(by=['name/description'])
49
  )
50
 
51
  # Ask the language model to categorize the remaining descriptions
52
  if not uncategorized_descriptions.empty:
53
  categorized_descriptions = await llm_list_categorizer(
54
+ uncategorized_descriptions[['name/description', 'category']]
55
  )
56
 
57
  categorized_descriptions.dropna(inplace=True)
 
59
  # Update the category for uncategorized transactions based on the language model results
60
  if not categorized_descriptions.empty:
61
  tx_list['category'] = tx_list['category'].fillna(
62
+ tx_list['name/description'].map(
63
+ categorized_descriptions.set_index('name/description')['category']
64
  )
65
  )
66
 
app/categorization/file_processing.py CHANGED
@@ -9,8 +9,8 @@ from datetime import datetime
9
  import pandas as pd
10
  from dateparser import parse
11
 
12
- from categorizer_list import categorize_list
13
- from config import RESULT_OUTPUT_FILE, CATEGORY_REFERENCE_OUTPUT_FILE
14
 
15
  # Read file and process it (e.g. categorize transactions)
16
  async def process_file(file_path: str) -> Dict[str, Union[str, pd.DataFrame]]:
 
9
  import pandas as pd
10
  from dateparser import parse
11
 
12
+ from app.categorization.categorizer_list import categorize_list
13
+ from app.categorization.config import RESULT_OUTPUT_FILE, CATEGORY_REFERENCE_OUTPUT_FILE
14
 
15
  # Read file and process it (e.g. categorize transactions)
16
  async def process_file(file_path: str) -> Dict[str, Union[str, pd.DataFrame]]:
app/transactions_rag/categorize_transactions.ipynb CHANGED
@@ -134,40 +134,411 @@
134
  },
135
  {
136
  "cell_type": "code",
137
- "execution_count": 1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  "metadata": {},
139
  "outputs": [
140
  {
141
- "ename": "ModuleNotFoundError",
142
- "evalue": "No module named 'categorization'",
143
- "output_type": "error",
144
- "traceback": [
145
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
146
- "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
147
- "Cell \u001b[0;32mIn[1], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Process the transactions csv and get the results from the categorization llm utility\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcategorization\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfile_processing\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m process_file, save_results\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdotenv\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m load_dotenv\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01masyncio\u001b[39;00m\n",
148
- "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'categorization'"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  ]
150
  }
151
  ],
152
  "source": [
153
  "# Process the transactions csv and get the results from the categorization llm utility\n",
154
- "from categorization.file_processing import process_file, save_results\n",
155
- "from dotenv import load_dotenv\n",
 
156
  "import asyncio\n",
 
 
 
157
  "\n",
158
- "load_dotenv()\n",
 
159
  "\n",
160
  "async def apply_categorization():\n",
161
- " processed_file = process_file(\"transactions_2024.csv\")\n",
162
  "\n",
163
  " print(\"\\nProcessing file\")\n",
164
  " result = await asyncio.gather(processed_file)\n",
165
  "\n",
166
- " save_results(results)\n",
167
- " print(results)\n",
168
- " \n",
 
 
 
169
  "\n",
170
- "asyncio.run(apply_categorization())"
 
 
171
  ]
172
  }
173
  ],
 
134
  },
135
  {
136
  "cell_type": "code",
137
+ "execution_count": 15,
138
+ "metadata": {},
139
+ "outputs": [],
140
+ "source": [
141
+ "!python3 -m venv venv"
142
+ ]
143
+ },
144
+ {
145
+ "cell_type": "code",
146
+ "execution_count": 16,
147
+ "metadata": {},
148
+ "outputs": [],
149
+ "source": [
150
+ "!source venv/bin/activate"
151
+ ]
152
+ },
153
+ {
154
+ "cell_type": "code",
155
+ "execution_count": 23,
156
+ "metadata": {},
157
+ "outputs": [
158
+ {
159
+ "name": "stdout",
160
+ "output_type": "stream",
161
+ "text": [
162
+ "Defaulting to user installation because normal site-packages is not writeable\n",
163
+ "Requirement already satisfied: setuptools in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (70.0.0)\n"
164
+ ]
165
+ }
166
+ ],
167
+ "source": [
168
+ "!pip3 install --upgrade setuptools"
169
+ ]
170
+ },
171
+ {
172
+ "cell_type": "code",
173
+ "execution_count": 2,
174
+ "metadata": {},
175
+ "outputs": [
176
+ {
177
+ "data": {
178
+ "text/plain": [
179
+ "['Defaulting to user installation because normal site-packages is not writeable',\n",
180
+ " 'Requirement already satisfied: langchain in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (from -r requirements.txt (line 1)) (0.2.1)',\n",
181
+ " 'Collecting langchain_openai (from -r requirements.txt (line 2))',\n",
182
+ " ' Downloading langchain_openai-0.1.7-py3-none-any.whl.metadata (2.5 kB)',\n",
183
+ " 'Collecting python-dotenv (from -r requirements.txt (line 3))',\n",
184
+ " ' Using cached python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)',\n",
185
+ " 'Collecting openai (from -r requirements.txt (line 4))',\n",
186
+ " ' Using cached openai-1.30.4-py3-none-any.whl.metadata (21 kB)',\n",
187
+ " 'Requirement already satisfied: tenacity in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (from -r requirements.txt (line 5)) (8.3.0)',\n",
188
+ " 'Requirement already satisfied: rapidfuzz in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (from -r requirements.txt (line 6)) (3.9.1)',\n",
189
+ " 'Requirement already satisfied: pydantic in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (from -r requirements.txt (line 7)) (2.7.1)',\n",
190
+ " 'Requirement already satisfied: dateparser in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (from -r requirements.txt (line 8)) (1.2.0)',\n",
191
+ " 'Requirement already satisfied: pandas in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (from -r requirements.txt (line 9)) (2.0.3)',\n",
192
+ " 'Collecting path (from -r requirements.txt (line 10))',\n",
193
+ " ' Using cached path-16.14.0-py3-none-any.whl.metadata (6.3 kB)',\n",
194
+ " 'Collecting aiostream==0.5.2 (from -r requirements.txt (line 12))',\n",
195
+ " ' Using cached aiostream-0.5.2-py3-none-any.whl.metadata (9.9 kB)',\n",
196
+ " 'Collecting cachetools==5.3.3 (from -r requirements.txt (line 13))',\n",
197
+ " ' Using cached cachetools-5.3.3-py3-none-any.whl.metadata (5.3 kB)',\n",
198
+ " 'Collecting docx2txt==0.8 (from -r requirements.txt (line 14))',\n",
199
+ " ' Using cached docx2txt-0.8.tar.gz (2.8 kB)',\n",
200
+ " ' Preparing metadata (setup.py): started',\n",
201
+ " \" Preparing metadata (setup.py): finished with status 'done'\",\n",
202
+ " 'Collecting fastapi==0.109.1 (from -r requirements.txt (line 15))',\n",
203
+ " ' Using cached fastapi-0.109.1-py3-none-any.whl.metadata (25 kB)',\n",
204
+ " 'Collecting llama-index-agent-openai==0.2.2 (from -r requirements.txt (line 16))',\n",
205
+ " ' Using cached llama_index_agent_openai-0.2.2-py3-none-any.whl.metadata (677 bytes)',\n",
206
+ " 'Collecting llama-index-core==0.10.28 (from -r requirements.txt (line 17))',\n",
207
+ " ' Using cached llama_index_core-0.10.28-py3-none-any.whl.metadata (3.6 kB)',\n",
208
+ " 'Collecting llama-index-vector-stores-pinecone==0.1.3 (from -r requirements.txt (line 18))',\n",
209
+ " ' Using cached llama_index_vector_stores_pinecone-0.1.3-py3-none-any.whl.metadata (725 bytes)',\n",
210
+ " 'Collecting llama-index==0.10.28 (from -r requirements.txt (line 19))',\n",
211
+ " ' Using cached llama_index-0.10.28-py3-none-any.whl.metadata (11 kB)',\n",
212
+ " 'Collecting python-dotenv (from -r requirements.txt (line 3))',\n",
213
+ " ' Using cached python_dotenv-1.0.0-py3-none-any.whl.metadata (21 kB)',\n",
214
+ " '\\x1b[31mERROR: Ignored the following versions that require a different python version: 0.0.10 Requires-Python >=3.9,<4.0; 0.0.11 Requires-Python >=3.9,<4.0; 0.0.12 Requires-Python >=3.9,<4.0; 0.0.13 Requires-Python >=3.9,<4.0; 0.0.14 Requires-Python >=3.9,<4.0; 0.0.15 Requires-Python >=3.9,<4.0; 0.0.16 Requires-Python >=3.9,<4.0; 0.0.17 Requires-Python >=3.9,<4.0; 0.0.18 Requires-Python >=3.9,<4.0; 0.0.19 Requires-Python >=3.9,<4.0; 0.0.20 Requires-Python >=3.9,<4.0; 0.0.21 Requires-Python >=3.9,<4.0; 0.0.22 Requires-Python >=3.9,<4.0; 0.0.23 Requires-Python >=3.9,<4.0; 0.0.24 Requires-Python >=3.9,<4.0; 0.0.25 Requires-Python >=3.9,<4.0; 0.14.3 Requires-Python >=3.9,<4; 0.14.4 Requires-Python <4,>=3.9; 0.14.5 Requires-Python <4,>=3.9; 0.15.0 Requires-Python <4,>=3.9; 0.15.1 Requires-Python <4,>=3.9; 0.15.10 Requires-Python <4,>=3.9; 0.15.11 Requires-Python <4,>=3.9; 0.15.12 Requires-Python <4,>=3.9; 0.15.13 Requires-Python <4,>=3.9; 0.15.2 Requires-Python <4,>=3.9; 0.15.3 Requires-Python <4,>=3.9; 0.15.4 Requires-Python <4,>=3.9; 0.15.5 Requires-Python <4,>=3.9; 0.15.6 Requires-Python <4,>=3.9; 0.15.7 Requires-Python <4,>=3.9; 0.15.8 Requires-Python <4,>=3.9; 0.15.9 Requires-Python <4,>=3.9; 0.16.0 Requires-Python <4,>=3.9; 0.16.1 Requires-Python <4,>=3.9; 0.16.2 Requires-Python <4,>=3.9; 0.16.3 Requires-Python <4,>=3.9; 0.16.4 Requires-Python <4,>=3.9; 0.16.5 Requires-Python <4,>=3.9; 0.16.6 Requires-Python <4,>=3.9; 0.16.7 Requires-Python <4,>=3.9; 0.16.8 Requires-Python <4,>=3.9; 0.16.9 Requires-Python <4,>=3.9; 0.17.0 Requires-Python <4,>=3.9; 0.17.1 Requires-Python <4,>=3.9; 0.17.2 Requires-Python <4,>=3.9; 0.17.3 Requires-Python <4,>=3.9; 0.17.4 Requires-Python <4,>=3.9; 0.17.5 Requires-Python <4,>=3.9; 0.17.6 Requires-Python <4,>=3.9; 0.17.7 Requires-Python <4,>=3.9; 0.18.0 Requires-Python <4,>=3.9; 0.18.1 Requires-Python <4,>=3.9; 0.18.2 Requires-Python <4,>=3.9; 0.19.0 Requires-Python <4,>=3.9; 0.20.0 Requires-Python <4,>=3.9; 0.21.0 Requires-Python <4,>=3.9\\x1b[0m\\x1b[31m',\n",
215
+ " '\\x1b[0m\\x1b[31mERROR: Could not find a version that satisfies the requirement traceloop-sdk==0.15.11 (from versions: 0.0.26, 0.0.27b0, 0.0.27, 0.0.28, 0.0.29, 0.0.31, 0.0.32, 0.0.33, 0.0.34, 0.0.35, 0.0.36, 0.0.37, 0.0.38, 0.0.39, 0.0.40, 0.0.41, 0.0.42, 0.0.43b0, 0.0.43b2, 0.0.43b3, 0.0.43b4, 0.0.43, 0.0.44b0, 0.0.44, 0.0.46, 0.0.47, 0.0.48, 0.0.49, 0.0.50, 0.0.51, 0.0.52, 0.0.53, 0.0.54, 0.0.55, 0.0.56a0, 0.0.56a1, 0.0.56a2, 0.0.56a3, 0.0.56a4, 0.0.56a5, 0.0.56a6, 0.0.56, 0.0.57, 0.0.58, 0.0.59, 0.0.60a0, 0.0.60a1, 0.0.60, 0.0.61a0, 0.0.61, 0.0.62, 0.0.63, 0.0.64a0, 0.0.64, 0.0.65, 0.0.66, 0.0.67, 0.0.68, 0.0.69, 0.0.70, 0.1.3, 0.1.5, 0.1.6, 0.1.7, 0.1.8, 0.1.9, 0.1.10, 0.1.11, 0.1.12, 0.2.0, 0.2.1, 0.3.0, 0.3.1, 0.3.2, 0.3.3, 0.3.4, 0.3.5, 0.3.6, 0.4.0, 0.4.1, 0.4.2, 0.5.0, 0.5.1, 0.5.2, 0.5.3, 0.6.0, 0.7.0, 0.8.0, 0.8.2, 0.9.0, 0.9.1, 0.9.2, 0.9.3, 0.9.4, 0.10.0, 0.10.1, 0.10.2, 0.10.3, 0.10.4, 0.10.5, 0.11.0, 0.11.1, 0.11.2, 0.11.3, 0.12.0, 0.12.1, 0.12.2, 0.12.3, 0.12.4, 0.12.5, 0.13.0, 0.13.1, 0.13.2, 0.13.3, 0.14.0, 0.14.1, 0.14.2)\\x1b[0m\\x1b[31m',\n",
216
+ " '\\x1b[0m\\x1b[31mERROR: No matching distribution found for traceloop-sdk==0.15.11\\x1b[0m\\x1b[31m',\n",
217
+ " '\\x1b[0m']"
218
+ ]
219
+ },
220
+ "execution_count": 2,
221
+ "metadata": {},
222
+ "output_type": "execute_result"
223
+ }
224
+ ],
225
+ "source": [
226
+ "!!pip3 install -r requirements.txt"
227
+ ]
228
+ },
229
+ {
230
+ "cell_type": "code",
231
+ "execution_count": 28,
232
+ "metadata": {},
233
+ "outputs": [
234
+ {
235
+ "name": "stdout",
236
+ "output_type": "stream",
237
+ "text": [
238
+ "Defaulting to user installation because normal site-packages is not writeable\n",
239
+ "Collecting dateparser\n",
240
+ " Using cached dateparser-1.2.0-py2.py3-none-any.whl.metadata (28 kB)\n",
241
+ "Requirement already satisfied: python-dateutil in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (from dateparser) (2.9.0.post0)\n",
242
+ "Requirement already satisfied: pytz in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (from dateparser) (2024.1)\n",
243
+ "Collecting regex!=2019.02.19,!=2021.8.27 (from dateparser)\n",
244
+ " Downloading regex-2024.5.15-cp38-cp38-macosx_10_9_x86_64.whl.metadata (40 kB)\n",
245
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.9/40.9 kB\u001b[0m \u001b[31m95.7 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
246
+ "\u001b[?25hCollecting tzlocal (from dateparser)\n",
247
+ " Downloading tzlocal-5.2-py3-none-any.whl.metadata (7.8 kB)\n",
248
+ "Requirement already satisfied: six>=1.5 in /Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/site-packages (from python-dateutil->dateparser) (1.15.0)\n",
249
+ "Collecting backports.zoneinfo (from tzlocal->dateparser)\n",
250
+ " Downloading backports.zoneinfo-0.2.1-cp38-cp38-macosx_10_14_x86_64.whl.metadata (4.7 kB)\n",
251
+ "Downloading dateparser-1.2.0-py2.py3-none-any.whl (294 kB)\n",
252
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m295.0/295.0 kB\u001b[0m \u001b[31m510.0 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
253
+ "\u001b[?25hDownloading regex-2024.5.15-cp38-cp38-macosx_10_9_x86_64.whl (281 kB)\n",
254
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m281.8/281.8 kB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
255
+ "\u001b[?25hDownloading tzlocal-5.2-py3-none-any.whl (17 kB)\n",
256
+ "Downloading backports.zoneinfo-0.2.1-cp38-cp38-macosx_10_14_x86_64.whl (35 kB)\n",
257
+ "Installing collected packages: regex, backports.zoneinfo, tzlocal, dateparser\n",
258
+ "\u001b[33m WARNING: The script dateparser-download is installed in '/Users/patrickalexis/Library/Python/3.8/bin' which is not on PATH.\n",
259
+ " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
260
+ "\u001b[0mSuccessfully installed backports.zoneinfo-0.2.1 dateparser-1.2.0 regex-2024.5.15 tzlocal-5.2\n"
261
+ ]
262
+ }
263
+ ],
264
+ "source": [
265
+ "!pip3 install dateparser"
266
+ ]
267
+ },
268
+ {
269
+ "cell_type": "code",
270
+ "execution_count": 2,
271
+ "metadata": {},
272
+ "outputs": [
273
+ {
274
+ "name": "stdout",
275
+ "output_type": "stream",
276
+ "text": [
277
+ "Defaulting to user installation because normal site-packages is not writeable\n",
278
+ "Collecting rapidfuzz\n",
279
+ " Using cached rapidfuzz-3.9.1-cp38-cp38-macosx_10_9_x86_64.whl.metadata (11 kB)\n",
280
+ "Using cached rapidfuzz-3.9.1-cp38-cp38-macosx_10_9_x86_64.whl (2.1 MB)\n",
281
+ "Installing collected packages: rapidfuzz\n",
282
+ "Successfully installed rapidfuzz-3.9.1\n"
283
+ ]
284
+ }
285
+ ],
286
+ "source": [
287
+ "!pip3 install rapidfuzz"
288
+ ]
289
+ },
290
+ {
291
+ "cell_type": "code",
292
+ "execution_count": 2,
293
  "metadata": {},
294
  "outputs": [
295
  {
296
+ "name": "stdout",
297
+ "output_type": "stream",
298
+ "text": [
299
+ "Defaulting to user installation because normal site-packages is not writeable\n",
300
+ "Collecting dotenv\n",
301
+ " Downloading dotenv-0.0.5.tar.gz (2.4 kB)\n",
302
+ " Preparing metadata (setup.py) ... \u001b[?25lerror\n",
303
+ " \u001b[1;31merror\u001b[0m: \u001b[1msubprocess-exited-with-error\u001b[0m\n",
304
+ " \n",
305
+ " \u001b[31mΓ—\u001b[0m \u001b[32mpython setup.py egg_info\u001b[0m did not run successfully.\n",
306
+ " \u001b[31mβ”‚\u001b[0m exit code: \u001b[1;36m1\u001b[0m\n",
307
+ " \u001b[31m╰─>\u001b[0m \u001b[31m[76 lines of output]\u001b[0m\n",
308
+ " \u001b[31m \u001b[0m /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages/setuptools/__init__.py:80: _DeprecatedInstaller: setuptools.installer and fetch_build_eggs are deprecated.\n",
309
+ " \u001b[31m \u001b[0m !!\n",
310
+ " \u001b[31m \u001b[0m \n",
311
+ " \u001b[31m \u001b[0m ********************************************************************************\n",
312
+ " \u001b[31m \u001b[0m Requirements should be satisfied by a PEP 517 installer.\n",
313
+ " \u001b[31m \u001b[0m If you are using pip, you can try `pip install --use-pep517`.\n",
314
+ " \u001b[31m \u001b[0m ********************************************************************************\n",
315
+ " \u001b[31m \u001b[0m \n",
316
+ " \u001b[31m \u001b[0m !!\n",
317
+ " \u001b[31m \u001b[0m dist.fetch_build_eggs(dist.setup_requires)\n",
318
+ " \u001b[31m \u001b[0m \u001b[1;31merror\u001b[0m: \u001b[1msubprocess-exited-with-error\u001b[0m\n",
319
+ " \u001b[31m \u001b[0m \n",
320
+ " \u001b[31m \u001b[0m \u001b[31mΓ—\u001b[0m \u001b[32mpython setup.py egg_info\u001b[0m did not run successfully.\n",
321
+ " \u001b[31m \u001b[0m \u001b[31mβ”‚\u001b[0m exit code: \u001b[1;36m1\u001b[0m\n",
322
+ " \u001b[31m \u001b[0m \u001b[31m╰─>\u001b[0m \u001b[31m[16 lines of output]\u001b[0m\n",
323
+ " \u001b[31m \u001b[0m \u001b[31m \u001b[0m Traceback (most recent call last):\n",
324
+ " \u001b[31m \u001b[0m \u001b[31m \u001b[0m File \"<string>\", line 2, in <module>\n",
325
+ " \u001b[31m \u001b[0m \u001b[31m \u001b[0m File \"<pip-setuptools-caller>\", line 14, in <module>\n",
326
+ " \u001b[31m \u001b[0m \u001b[31m \u001b[0m File \"/private/var/folders/g9/flbt409n1b7g0ry4b9ffdvnc0000gn/T/pip-wheel-66v7g1h6/distribute_6e0b47b8750a4aa8bfff62a3f867a97f/setuptools/__init__.py\", line 2, in <module>\n",
327
+ " \u001b[31m \u001b[0m \u001b[31m \u001b[0m from setuptools.extension import Extension, Library\n",
328
+ " \u001b[31m \u001b[0m \u001b[31m \u001b[0m File \"/private/var/folders/g9/flbt409n1b7g0ry4b9ffdvnc0000gn/T/pip-wheel-66v7g1h6/distribute_6e0b47b8750a4aa8bfff62a3f867a97f/setuptools/extension.py\", line 5, in <module>\n",
329
+ " \u001b[31m \u001b[0m \u001b[31m \u001b[0m from setuptools.dist import _get_unpatched\n",
330
+ " \u001b[31m \u001b[0m \u001b[31m \u001b[0m File \"/private/var/folders/g9/flbt409n1b7g0ry4b9ffdvnc0000gn/T/pip-wheel-66v7g1h6/distribute_6e0b47b8750a4aa8bfff62a3f867a97f/setuptools/dist.py\", line 7, in <module>\n",
331
+ " \u001b[31m \u001b[0m \u001b[31m \u001b[0m from setuptools.command.install import install\n",
332
+ " \u001b[31m \u001b[0m \u001b[31m \u001b[0m File \"/private/var/folders/g9/flbt409n1b7g0ry4b9ffdvnc0000gn/T/pip-wheel-66v7g1h6/distribute_6e0b47b8750a4aa8bfff62a3f867a97f/setuptools/command/__init__.py\", line 8, in <module>\n",
333
+ " \u001b[31m \u001b[0m \u001b[31m \u001b[0m from setuptools.command import install_scripts\n",
334
+ " \u001b[31m \u001b[0m \u001b[31m \u001b[0m File \"/private/var/folders/g9/flbt409n1b7g0ry4b9ffdvnc0000gn/T/pip-wheel-66v7g1h6/distribute_6e0b47b8750a4aa8bfff62a3f867a97f/setuptools/command/install_scripts.py\", line 3, in <module>\n",
335
+ " \u001b[31m \u001b[0m \u001b[31m \u001b[0m from pkg_resources import Distribution, PathMetadata, ensure_directory\n",
336
+ " \u001b[31m \u001b[0m \u001b[31m \u001b[0m File \"/private/var/folders/g9/flbt409n1b7g0ry4b9ffdvnc0000gn/T/pip-wheel-66v7g1h6/distribute_6e0b47b8750a4aa8bfff62a3f867a97f/pkg_resources.py\", line 1518, in <module>\n",
337
+ " \u001b[31m \u001b[0m \u001b[31m \u001b[0m register_loader_type(importlib_bootstrap.SourceFileLoader, DefaultProvider)\n",
338
+ " \u001b[31m \u001b[0m \u001b[31m \u001b[0m AttributeError: module 'importlib._bootstrap' has no attribute 'SourceFileLoader'\n",
339
+ " \u001b[31m \u001b[0m \u001b[31m \u001b[0m \u001b[31m[end of output]\u001b[0m\n",
340
+ " \u001b[31m \u001b[0m \n",
341
+ " \u001b[31m \u001b[0m \u001b[1;35mnote\u001b[0m: This error originates from a subprocess, and is likely not a problem with pip.\n",
342
+ " \u001b[31m \u001b[0m \u001b[1;31merror\u001b[0m: \u001b[1mmetadata-generation-failed\u001b[0m\n",
343
+ " \u001b[31m \u001b[0m \n",
344
+ " \u001b[31m \u001b[0m \u001b[31mΓ—\u001b[0m Encountered error while generating package metadata.\n",
345
+ " \u001b[31m \u001b[0m \u001b[31m╰─>\u001b[0m See above for output.\n",
346
+ " \u001b[31m \u001b[0m \n",
347
+ " \u001b[31m \u001b[0m \u001b[1;35mnote\u001b[0m: This is an issue with the package mentioned above, not pip.\n",
348
+ " \u001b[31m \u001b[0m \u001b[1;36mhint\u001b[0m: See above for details.\n",
349
+ " \u001b[31m \u001b[0m Traceback (most recent call last):\n",
350
+ " \u001b[31m \u001b[0m File \"/Users/patrickalexis/Library/Python/3.8/lib/python/site-packages/setuptools/installer.py\", line 101, in _fetch_build_egg_no_warn\n",
351
+ " \u001b[31m \u001b[0m subprocess.check_call(cmd)\n",
352
+ " \u001b[31m \u001b[0m File \"/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/subprocess.py\", line 364, in check_call\n",
353
+ " \u001b[31m \u001b[0m raise CalledProcessError(retcode, cmd)\n",
354
+ " \u001b[31m \u001b[0m subprocess.CalledProcessError: Command '['/Applications/Xcode.app/Contents/Developer/usr/bin/python3', '-m', 'pip', '--disable-pip-version-check', 'wheel', '--no-deps', '-w', '/var/folders/g9/flbt409n1b7g0ry4b9ffdvnc0000gn/T/tmpnw2yadh3', '--quiet', 'distribute']' returned non-zero exit status 1.\n",
355
+ " \u001b[31m \u001b[0m \n",
356
+ " \u001b[31m \u001b[0m The above exception was the direct cause of the following exception:\n",
357
+ " \u001b[31m \u001b[0m \n",
358
+ " \u001b[31m \u001b[0m Traceback (most recent call last):\n",
359
+ " \u001b[31m \u001b[0m File \"<string>\", line 2, in <module>\n",
360
+ " \u001b[31m \u001b[0m File \"<pip-setuptools-caller>\", line 34, in <module>\n",
361
+ " \u001b[31m \u001b[0m File \"/private/var/folders/g9/flbt409n1b7g0ry4b9ffdvnc0000gn/T/pip-install-ac3_ml88/dotenv_7766945cfd7d4e62965375b30e9a0c21/setup.py\", line 13, in <module>\n",
362
+ " \u001b[31m \u001b[0m setup(name='dotenv',\n",
363
+ " \u001b[31m \u001b[0m File \"/Users/patrickalexis/Library/Python/3.8/lib/python/site-packages/setuptools/__init__.py\", line 102, in setup\n",
364
+ " \u001b[31m \u001b[0m _install_setup_requires(attrs)\n",
365
+ " \u001b[31m \u001b[0m File \"/Users/patrickalexis/Library/Python/3.8/lib/python/site-packages/setuptools/__init__.py\", line 75, in _install_setup_requires\n",
366
+ " \u001b[31m \u001b[0m _fetch_build_eggs(dist)\n",
367
+ " \u001b[31m \u001b[0m File \"/Users/patrickalexis/Library/Python/3.8/lib/python/site-packages/setuptools/__init__.py\", line 80, in _fetch_build_eggs\n",
368
+ " \u001b[31m \u001b[0m dist.fetch_build_eggs(dist.setup_requires)\n",
369
+ " \u001b[31m \u001b[0m File \"/Users/patrickalexis/Library/Python/3.8/lib/python/site-packages/setuptools/dist.py\", line 641, in fetch_build_eggs\n",
370
+ " \u001b[31m \u001b[0m return _fetch_build_eggs(self, requires)\n",
371
+ " \u001b[31m \u001b[0m File \"/Users/patrickalexis/Library/Python/3.8/lib/python/site-packages/setuptools/installer.py\", line 38, in _fetch_build_eggs\n",
372
+ " \u001b[31m \u001b[0m resolved_dists = pkg_resources.working_set.resolve(\n",
373
+ " \u001b[31m \u001b[0m File \"/Users/patrickalexis/Library/Python/3.8/lib/python/site-packages/pkg_resources/__init__.py\", line 787, in resolve\n",
374
+ " \u001b[31m \u001b[0m dist = self._resolve_dist(\n",
375
+ " \u001b[31m \u001b[0m File \"/Users/patrickalexis/Library/Python/3.8/lib/python/site-packages/pkg_resources/__init__.py\", line 823, in _resolve_dist\n",
376
+ " \u001b[31m \u001b[0m dist = best[req.key] = env.best_match(\n",
377
+ " \u001b[31m \u001b[0m File \"/Users/patrickalexis/Library/Python/3.8/lib/python/site-packages/pkg_resources/__init__.py\", line 1093, in best_match\n",
378
+ " \u001b[31m \u001b[0m return self.obtain(req, installer)\n",
379
+ " \u001b[31m \u001b[0m File \"/Users/patrickalexis/Library/Python/3.8/lib/python/site-packages/pkg_resources/__init__.py\", line 1104, in obtain\n",
380
+ " \u001b[31m \u001b[0m return installer(requirement) if installer else None\n",
381
+ " \u001b[31m \u001b[0m File \"/Users/patrickalexis/Library/Python/3.8/lib/python/site-packages/setuptools/installer.py\", line 103, in _fetch_build_egg_no_warn\n",
382
+ " \u001b[31m \u001b[0m raise DistutilsError(str(e)) from e\n",
383
+ " \u001b[31m \u001b[0m distutils.errors.DistutilsError: Command '['/Applications/Xcode.app/Contents/Developer/usr/bin/python3', '-m', 'pip', '--disable-pip-version-check', 'wheel', '--no-deps', '-w', '/var/folders/g9/flbt409n1b7g0ry4b9ffdvnc0000gn/T/tmpnw2yadh3', '--quiet', 'distribute']' returned non-zero exit status 1.\n",
384
+ " \u001b[31m \u001b[0m \u001b[31m[end of output]\u001b[0m\n",
385
+ " \n",
386
+ " \u001b[1;35mnote\u001b[0m: This error originates from a subprocess, and is likely not a problem with pip.\n",
387
+ "\u001b[?25h\u001b[1;31merror\u001b[0m: \u001b[1mmetadata-generation-failed\u001b[0m\n",
388
+ "\n",
389
+ "\u001b[31mΓ—\u001b[0m Encountered error while generating package metadata.\n",
390
+ "\u001b[31m╰─>\u001b[0m See above for output.\n",
391
+ "\n",
392
+ "\u001b[1;35mnote\u001b[0m: This is an issue with the package mentioned above, not pip.\n",
393
+ "\u001b[1;36mhint\u001b[0m: See above for details.\n"
394
+ ]
395
+ }
396
+ ],
397
+ "source": [
398
+ "!pip3 install dotenv"
399
+ ]
400
+ },
401
+ {
402
+ "cell_type": "code",
403
+ "execution_count": 10,
404
+ "metadata": {},
405
+ "outputs": [],
406
+ "source": [
407
+ "# Create data folder\n",
408
+ "!mkdir -p data/tx_data/output\n",
409
+ "!mkdir -p data/ref_data"
410
+ ]
411
+ },
412
+ {
413
+ "cell_type": "code",
414
+ "execution_count": 4,
415
+ "metadata": {},
416
+ "outputs": [
417
+ {
418
+ "name": "stdout",
419
+ "output_type": "stream",
420
+ "text": [
421
+ "Defaulting to user installation because normal site-packages is not writeable\n",
422
+ "Collecting openai\n",
423
+ " Using cached openai-1.30.4-py3-none-any.whl.metadata (21 kB)\n",
424
+ "Collecting anyio<5,>=3.5.0 (from openai)\n",
425
+ " Downloading anyio-4.4.0-py3-none-any.whl.metadata (4.6 kB)\n",
426
+ "Collecting distro<2,>=1.7.0 (from openai)\n",
427
+ " Downloading distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)\n",
428
+ "Collecting httpx<1,>=0.23.0 (from openai)\n",
429
+ " Downloading httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)\n",
430
+ "Requirement already satisfied: pydantic<3,>=1.9.0 in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (from openai) (2.7.1)\n",
431
+ "Collecting sniffio (from openai)\n",
432
+ " Downloading sniffio-1.3.1-py3-none-any.whl.metadata (3.9 kB)\n",
433
+ "Requirement already satisfied: tqdm>4 in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (from openai) (4.66.4)\n",
434
+ "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (from openai) (4.12.0)\n",
435
+ "Requirement already satisfied: idna>=2.8 in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (from anyio<5,>=3.5.0->openai) (3.7)\n",
436
+ "Collecting exceptiongroup>=1.0.2 (from anyio<5,>=3.5.0->openai)\n",
437
+ " Downloading exceptiongroup-1.2.1-py3-none-any.whl.metadata (6.6 kB)\n",
438
+ "Requirement already satisfied: certifi in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (from httpx<1,>=0.23.0->openai) (2024.2.2)\n",
439
+ "Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)\n",
440
+ " Downloading httpcore-1.0.5-py3-none-any.whl.metadata (20 kB)\n",
441
+ "Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)\n",
442
+ " Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)\n",
443
+ "Requirement already satisfied: annotated-types>=0.4.0 in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (from pydantic<3,>=1.9.0->openai) (0.7.0)\n",
444
+ "Requirement already satisfied: pydantic-core==2.18.2 in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (from pydantic<3,>=1.9.0->openai) (2.18.2)\n",
445
+ "Downloading openai-1.30.4-py3-none-any.whl (320 kB)\n",
446
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m320.6/320.6 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n",
447
+ "\u001b[?25hDownloading anyio-4.4.0-py3-none-any.whl (86 kB)\n",
448
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
449
+ "\u001b[?25hDownloading distro-1.9.0-py3-none-any.whl (20 kB)\n",
450
+ "Downloading httpx-0.27.0-py3-none-any.whl (75 kB)\n",
451
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
452
+ "\u001b[?25hDownloading httpcore-1.0.5-py3-none-any.whl (77 kB)\n",
453
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
454
+ "\u001b[?25hDownloading sniffio-1.3.1-py3-none-any.whl (10 kB)\n",
455
+ "Downloading exceptiongroup-1.2.1-py3-none-any.whl (16 kB)\n",
456
+ "Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n",
457
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
458
+ "\u001b[?25hInstalling collected packages: sniffio, h11, exceptiongroup, distro, httpcore, anyio, httpx, openai\n",
459
+ "\u001b[33m WARNING: The script distro is installed in '/Users/patrickalexis/Library/Python/3.8/bin' which is not on PATH.\n",
460
+ " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
461
+ "\u001b[0m\u001b[33m WARNING: The script httpx is installed in '/Users/patrickalexis/Library/Python/3.8/bin' which is not on PATH.\n",
462
+ " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
463
+ "\u001b[0m\u001b[33m WARNING: The script openai is installed in '/Users/patrickalexis/Library/Python/3.8/bin' which is not on PATH.\n",
464
+ " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
465
+ "\u001b[0mSuccessfully installed anyio-4.4.0 distro-1.9.0 exceptiongroup-1.2.1 h11-0.14.0 httpcore-1.0.5 httpx-0.27.0 openai-1.30.4 sniffio-1.3.1\n"
466
+ ]
467
+ }
468
+ ],
469
+ "source": [
470
+ "!pip3 install openai"
471
+ ]
472
+ },
473
+ {
474
+ "cell_type": "code",
475
+ "execution_count": 5,
476
+ "metadata": {},
477
+ "outputs": [
478
+ {
479
+ "name": "stdout",
480
+ "output_type": "stream",
481
+ "text": [
482
+ "\n",
483
+ "Processing file\n"
484
+ ]
485
+ },
486
+ {
487
+ "name": "stderr",
488
+ "output_type": "stream",
489
+ "text": [
490
+ "ERROR:root:| File: transactions_2024.csv | Unexpected Error: expected str, got module\n"
491
+ ]
492
+ },
493
+ {
494
+ "name": "stdout",
495
+ "output_type": "stream",
496
+ "text": [
497
+ "ERROR processing file transactions_2024.csv: expected str, got module\n",
498
+ "\n",
499
+ "Processed 1 files: 0 successful, 1 with errors\n",
500
+ "\n",
501
+ "Errors in the following files:\n",
502
+ " transactions_2024.csv: expected str, got module\n",
503
+ "\n",
504
+ "\n",
505
+ "[{'file_name': 'transactions_2024.csv', 'output': Empty DataFrame\n",
506
+ "Columns: []\n",
507
+ "Index: [], 'error': 'expected str, got module'}]\n",
508
+ "['name/description,category\\n']\n",
509
+ "None\n"
510
  ]
511
  }
512
  ],
513
  "source": [
514
  "# Process the transactions csv and get the results from the categorization llm utility\n",
515
+ "from app.categorization.file_processing import process_file, save_results\n",
516
+ "from app.categorization.config import CATEGORY_REFERENCE_OUTPUT_FILE\n",
517
+ "# from dotenv import load_dotenv\n",
518
  "import asyncio\n",
519
+ "import os.path\n",
520
+ "\n",
521
+ "# load_dotenv()\n",
522
  "\n",
523
+ "# relative_dir = os.path.dirname(__file__)\n",
524
+ "# abs_file_path = os.path.join(relative_dir, CATEGORY_REFERENCE_OUTPUT_FILE)\n",
525
  "\n",
526
  "async def apply_categorization():\n",
527
+ " processed_file = process_file(\"app/transactions_rag/transactions_2024.csv\")\n",
528
  "\n",
529
  " print(\"\\nProcessing file\")\n",
530
  " result = await asyncio.gather(processed_file)\n",
531
  "\n",
532
+ " save_results(result)\n",
533
+ " print(result)\n",
534
+ "\n",
535
+ " output_file = open(CATEGORY_REFERENCE_OUTPUT_FILE, \"r+\")\n",
536
+ " print(output_file.readlines())\n",
537
+ "\n",
538
  "\n",
539
+ "result = await apply_categorization()\n",
540
+ "print(result)\n",
541
+ "\n"
542
  ]
543
  }
544
  ],
requirements.txt CHANGED
@@ -1,3 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
1
  aiostream==0.5.2
2
  cachetools==5.3.3
3
  docx2txt==0.8
@@ -8,14 +19,4 @@ llama-index-vector-stores-pinecone==0.1.3
8
  llama-index==0.10.28
9
  python-dotenv==1.0.0
10
  traceloop-sdk==0.15.11
11
- uvicorn==0.23.2
12
-
13
- langchain
14
- python-dotenv
15
- openai
16
- tenacity
17
- rapidfuzz
18
- pydantic
19
- dateparser
20
- pandas
21
- path
 
1
+ langchain
2
+ langchain_openai
3
+ python-dotenv
4
+ openai
5
+ tenacity
6
+ rapidfuzz
7
+ pydantic
8
+ dateparser
9
+ pandas
10
+ path
11
+
12
  aiostream==0.5.2
13
  cachetools==5.3.3
14
  docx2txt==0.8
 
19
  llama-index==0.10.28
20
  python-dotenv==1.0.0
21
  traceloop-sdk==0.15.11
22
+ uvicorn==0.23.2