Spaces:

praneeth-hakeem-patrick
/

backend

Sleeping

App Files Files Community

Praneeth Yerrapragada commited on May 31

Commit

5112690

•

1 Parent(s): ea9333f

chore: remove open_ai_apikey in plain text

Browse files

Files changed (1) hide show

app/categorization/categorizer.py +16 -12

app/categorization/categorizer.py CHANGED Viewed

@@ -46,12 +46,13 @@ def fuzzy_match_list_categorizer(
     """
     # Fuzzy-match this description against the reference descriptions
-    match_results = process.extractOne(description, descriptions, score_cutoff=threshold)
     # If a match is found, return the category of the matched description
     if match_results:
         return description_category_pairs.at[match_results[2], 'category']
     return None
@@ -69,10 +70,10 @@ async def llm_list_categorizer(tx_list: pd.DataFrame) -> pd.DataFrame:
     """
     # Initialize language model and prompt
-    # openai_api_key = os.environ['OPENAI_API_KEY']
-    # print("apikey: " + openai_api_key)
-    llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0125", api_key="sk-codepath-group-project-3WwlG0qG8GDG7SMVjgFLT3BlbkFJOHIlVsq0UXFqaOV7hl3O")
-    prompt = PromptTemplate.from_template(template=CATEGORY_TEMPLATE)
     chain = LLMChain(llm=llm, prompt=prompt)
     # Iterate over the DataFrame in batches of TX_PER_LLM_RUN transactions
@@ -87,7 +88,8 @@ async def llm_list_categorizer(tx_list: pd.DataFrame) -> pd.DataFrame:
     valid_results = [result['output'] for result in results if result['valid']]
     # Flatten the list of valid results to obtain a single list of description-category pairs
-    valid_outputs = [output for valid_result in valid_results for output in valid_result]
     # Return a DataFrame with the valid outputs
     return pd.DataFrame(valid_outputs, columns=['name/description', 'category'])
@@ -120,7 +122,7 @@ async def llm_sublist_categorizer(
     try:
         # Create a pattern to match a list Description-Category pairs (List[Tuple[str, str]])
         pattern = r"\['([^']+)', '([^']+)'\]"
         # Use it to extract all the correctly formatted pairs from the raw result
         matches = re.findall(pattern, raw_result.replace("\\'", "'"))
@@ -131,13 +133,15 @@ async def llm_sublist_categorizer(
                 parsed_pair = ast.literal_eval(str(list(match)))
                 valid_outputs.append(parsed_pair)
             except Exception as e:
-                logger.log(logging.ERROR, f"Parsing Error: {e}\nMatch: {match}\n")
                 result['valid'] = False
         result['output'] = valid_outputs
     except Exception as e:
-        logging.log(logging.ERROR, f"| File: {file_name} | Unexpected Error: {e}\nRaw Result: {raw_result}")
         result['valid'] = False
-    return result

     """
     # Fuzzy-match this description against the reference descriptions
+    match_results = process.extractOne(
+        description, descriptions, score_cutoff=threshold)
     # If a match is found, return the category of the matched description
     if match_results:
         return description_category_pairs.at[match_results[2], 'category']
     return None
     """
     # Initialize language model and prompt
+    openai_api_key = os.environ['OPENAI_API_KEY']
+    llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0125",
+                     api_key=openai_api_key)
+    prompt = PromptTemplate.from_template(template=CATEGORY_TEMPLATE)
     chain = LLMChain(llm=llm, prompt=prompt)
     # Iterate over the DataFrame in batches of TX_PER_LLM_RUN transactions
     valid_results = [result['output'] for result in results if result['valid']]
     # Flatten the list of valid results to obtain a single list of description-category pairs
+    valid_outputs = [
+        output for valid_result in valid_results for output in valid_result]
     # Return a DataFrame with the valid outputs
     return pd.DataFrame(valid_outputs, columns=['name/description', 'category'])
     try:
         # Create a pattern to match a list Description-Category pairs (List[Tuple[str, str]])
         pattern = r"\['([^']+)', '([^']+)'\]"
         # Use it to extract all the correctly formatted pairs from the raw result
         matches = re.findall(pattern, raw_result.replace("\\'", "'"))
                 parsed_pair = ast.literal_eval(str(list(match)))
                 valid_outputs.append(parsed_pair)
             except Exception as e:
+                logger.log(logging.ERROR,
+                           f"Parsing Error: {e}\nMatch: {match}\n")
                 result['valid'] = False
         result['output'] = valid_outputs
     except Exception as e:
+        logging.log(
+            logging.ERROR, f"| File: {file_name} | Unexpected Error: {e}\nRaw Result: {raw_result}")
         result['valid'] = False
+    return result