reddgr commited on
Commit
b51b4a2
·
1 Parent(s): 00ff256

clean app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -56
app.py CHANGED
@@ -1,5 +1,17 @@
1
- import time
2
- start_time = time.time()
 
 
 
 
 
 
 
 
 
 
 
 
3
  from pathlib import Path
4
  from typing import Tuple
5
  import pandas as pd
@@ -15,64 +27,29 @@ USE_DOTENV = False
15
  ROOT = Path(__file__).parent
16
 
17
  JSON_PATH = ROOT / "json"
18
- # DATASET_PATH = ROOT / "pkl" / "app_dataset.pkl"
 
19
  DOTENV_PATH = ROOT.parent.parent / "apis" / ".env"
 
20
  # DUCKDB_PATH = ROOT / "db" / "sss_vectordb.duckdb"
21
 
22
  from src import front_dataset_handler as fdh, app_utils as utils, semantic_search as ss, env_options
23
  tokens = env_options.check_env(use_dotenv=USE_DOTENV, dotenv_path=DOTENV_PATH, env_tokens = ["HF_TOKEN"])
24
- print(f"Libraries loaded. {time.time() - start_time:.2f} seconds.")
25
- # Carga de modelo de embeddings y conexión a DuckDB
26
- emb_model = SentenceTransformer("FinLang/finance-embeddings-investopedia", token = tokens.get("HF_TOKEN"))
27
- # con = duckdb.connect(DUCKDB_PATH)
28
- print(f"Model loaded. {time.time() - start_time:.2f} seconds.")
29
- #### CONEXIÓN DUCKDB A HUGGING FACE HUB ####
30
  print("Initializing DuckDB connection...")
31
  con = duckdb.connect()
32
- hf_token = tokens.get("HF_TOKEN")
33
- ##################################
34
- masked_hf_token = hf_token[:4] + "*" * (len(hf_token) - 8) + hf_token[-4:]
35
- print(f"Using Hugging Face token: {masked_hf_token}")
36
- ##################################
37
-
38
- hf_token = tokens.get("HF_TOKEN")
39
- masked_hf_token = hf_token[:4] + "*" * (len(hf_token) - 8) + hf_token[-4:]
40
- '''
41
- create_secret_query = f"""
42
- INSTALL httpfs;
43
- LOAD httpfs;
44
- CREATE PERSISTENT SECRET hf_token (
45
- TYPE huggingface,
46
- TOKEN '{hf_token}'
47
- );
48
- """
49
- '''
50
- # con.sql(create_secret_query)
51
- # print(con.sql("SELECT * FROM duckdb_secrets()").fetchdf())
52
- dataset_name = "reddgr/swift-stock-screener"
53
- # con.sql(query="INSTALL vss; LOAD vss;")
54
-
55
- create_secret_query = f"""
56
- INSTALL httpfs;
57
- LOAD httpfs;
58
- CREATE PERSISTENT SECRET hf_token (
59
- TYPE huggingface,
60
- TOKEN '{hf_token}'
61
- );
62
- """
63
- con.sql(create_secret_query)
64
- print(con.sql("SELECT * FROM duckdb_secrets()").fetchdf().iloc[0,-2])
65
- print(con.sql("SELECT * FROM duckdb_secrets()").fetchdf().iloc[0,-1])
66
- print(con.sql("SELECT * FROM duckdb_secrets()").fetchdf())
67
 
68
- # FROM 'hf://datasets/reddgr/swift-stock-screener/data/train-00000-of-00001.parquet';
69
  create_table_query = f"""
70
  INSTALL vss;
71
  LOAD vss;
72
  SET hnsw_enable_experimental_persistence = true;
73
  CREATE TABLE vector_table AS
74
  SELECT *, embeddings::float[{emb_model.get_sentence_embedding_dimension()}] as embeddings_float
75
- FROM 'parquet/app_dataset.parquet';
76
  """
77
 
78
  con.sql(create_table_query)
@@ -83,28 +60,19 @@ create_index_query = f"""
83
  """
84
  con.sql(create_index_query)
85
 
86
- # print(con.sql("SELECT * FROM duckdb_secrets()").fetchdf())
87
- print(f"Created search index. {time.time() - start_time:.2f} seconds.")
88
- ########################################
89
-
90
  # ESTADO GLOBAL
91
  last_result_df: pd.DataFrame = pd.DataFrame()
92
-
93
- ######################
94
  last_search_type: str = ""
95
  last_search_query: str = ""
96
- # last_filtros_values: Tuple = ()
97
  last_column_filters: list[tuple[str, str]] = []
98
  last_sort_col_label: str = ""
99
  last_sort_dir: str = ""
100
- #######################
101
 
102
  # ---------------------------------------------------------------------------
103
  # CONFIG --------------------------------------------------------------------
104
  # ---------------------------------------------------------------------------
105
- app_dataset = load_dataset("reddgr/swift-stock-screener", split="train", token = tokens.get("HF_TOKEN")).to_pandas()
106
 
107
- # dh_app = fdh.FrontDatasetHandler(app_dataset=pd.read_pickle(DATASET_PATH))
108
  dh_app = fdh.FrontDatasetHandler(app_dataset=app_dataset)
109
  maestro = dh_app.app_dataset[dh_app.app_dataset['quoteType']=='EQUITY'].copy()
110
  maestro_etf = dh_app.app_dataset[dh_app.app_dataset['quoteType']=='ETF'].copy()
 
1
+ '''
2
+ Swift Stock Screener (SSS)
3
+ Copyright 2025 David González Romero
4
+
5
+ Licensed under the Apache License, Version 2.0 (the "License");
6
+ you may not use this file except in compliance with the License.
7
+ You may obtain a copy of the License at
8
+
9
+ http://www.apache.org/licenses/LICENSE-2.0
10
+
11
+ App URL: https://huggingface.co/spaces/reddgr/sss
12
+ '''
13
+
14
+ # cd C:\Users\david\Documents\git\miax-tfm-dgr; python app.py
15
  from pathlib import Path
16
  from typing import Tuple
17
  import pandas as pd
 
27
  ROOT = Path(__file__).parent
28
 
29
  JSON_PATH = ROOT / "json"
30
+ DATASET_PATH = "reddgr/swift-stock-screener" # Hugging Face hub dataset name
31
+ EMB_MODEL_PATH = "FinLang/finance-embeddings-investopedia" # Hugging Face Hub embeddings model name
32
  DOTENV_PATH = ROOT.parent.parent / "apis" / ".env"
33
+ PARQUET_PATH = ROOT / "parquet" / "app_dataset.parquet"
34
  # DUCKDB_PATH = ROOT / "db" / "sss_vectordb.duckdb"
35
 
36
  from src import front_dataset_handler as fdh, app_utils as utils, semantic_search as ss, env_options
37
  tokens = env_options.check_env(use_dotenv=USE_DOTENV, dotenv_path=DOTENV_PATH, env_tokens = ["HF_TOKEN"])
38
+
39
+ emb_model = SentenceTransformer(EMB_MODEL_PATH, token = tokens.get("HF_TOKEN"))
40
+
41
+
42
+ #### CONEXIÓN DE DUCKDB CON EL DATASET PARA INDEXAR ####
 
43
  print("Initializing DuckDB connection...")
44
  con = duckdb.connect()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
 
46
  create_table_query = f"""
47
  INSTALL vss;
48
  LOAD vss;
49
  SET hnsw_enable_experimental_persistence = true;
50
  CREATE TABLE vector_table AS
51
  SELECT *, embeddings::float[{emb_model.get_sentence_embedding_dimension()}] as embeddings_float
52
+ FROM '{PARQUET_PATH}';
53
  """
54
 
55
  con.sql(create_table_query)
 
60
  """
61
  con.sql(create_index_query)
62
 
 
 
 
 
63
  # ESTADO GLOBAL
64
  last_result_df: pd.DataFrame = pd.DataFrame()
 
 
65
  last_search_type: str = ""
66
  last_search_query: str = ""
 
67
  last_column_filters: list[tuple[str, str]] = []
68
  last_sort_col_label: str = ""
69
  last_sort_dir: str = ""
 
70
 
71
  # ---------------------------------------------------------------------------
72
  # CONFIG --------------------------------------------------------------------
73
  # ---------------------------------------------------------------------------
74
+ app_dataset = load_dataset(DATASET_PATH, split="train", token = tokens.get("HF_TOKEN")).to_pandas()
75
 
 
76
  dh_app = fdh.FrontDatasetHandler(app_dataset=app_dataset)
77
  maestro = dh_app.app_dataset[dh_app.app_dataset['quoteType']=='EQUITY'].copy()
78
  maestro_etf = dh_app.app_dataset[dh_app.app_dataset['quoteType']=='ETF'].copy()