clean app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,17 @@
|
|
1 |
-
|
2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
from pathlib import Path
|
4 |
from typing import Tuple
|
5 |
import pandas as pd
|
@@ -15,64 +27,29 @@ USE_DOTENV = False
|
|
15 |
ROOT = Path(__file__).parent
|
16 |
|
17 |
JSON_PATH = ROOT / "json"
|
18 |
-
|
|
|
19 |
DOTENV_PATH = ROOT.parent.parent / "apis" / ".env"
|
|
|
20 |
# DUCKDB_PATH = ROOT / "db" / "sss_vectordb.duckdb"
|
21 |
|
22 |
from src import front_dataset_handler as fdh, app_utils as utils, semantic_search as ss, env_options
|
23 |
tokens = env_options.check_env(use_dotenv=USE_DOTENV, dotenv_path=DOTENV_PATH, env_tokens = ["HF_TOKEN"])
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
#### CONEXIÓN DUCKDB A HUGGING FACE HUB ####
|
30 |
print("Initializing DuckDB connection...")
|
31 |
con = duckdb.connect()
|
32 |
-
hf_token = tokens.get("HF_TOKEN")
|
33 |
-
##################################
|
34 |
-
masked_hf_token = hf_token[:4] + "*" * (len(hf_token) - 8) + hf_token[-4:]
|
35 |
-
print(f"Using Hugging Face token: {masked_hf_token}")
|
36 |
-
##################################
|
37 |
-
|
38 |
-
hf_token = tokens.get("HF_TOKEN")
|
39 |
-
masked_hf_token = hf_token[:4] + "*" * (len(hf_token) - 8) + hf_token[-4:]
|
40 |
-
'''
|
41 |
-
create_secret_query = f"""
|
42 |
-
INSTALL httpfs;
|
43 |
-
LOAD httpfs;
|
44 |
-
CREATE PERSISTENT SECRET hf_token (
|
45 |
-
TYPE huggingface,
|
46 |
-
TOKEN '{hf_token}'
|
47 |
-
);
|
48 |
-
"""
|
49 |
-
'''
|
50 |
-
# con.sql(create_secret_query)
|
51 |
-
# print(con.sql("SELECT * FROM duckdb_secrets()").fetchdf())
|
52 |
-
dataset_name = "reddgr/swift-stock-screener"
|
53 |
-
# con.sql(query="INSTALL vss; LOAD vss;")
|
54 |
-
|
55 |
-
create_secret_query = f"""
|
56 |
-
INSTALL httpfs;
|
57 |
-
LOAD httpfs;
|
58 |
-
CREATE PERSISTENT SECRET hf_token (
|
59 |
-
TYPE huggingface,
|
60 |
-
TOKEN '{hf_token}'
|
61 |
-
);
|
62 |
-
"""
|
63 |
-
con.sql(create_secret_query)
|
64 |
-
print(con.sql("SELECT * FROM duckdb_secrets()").fetchdf().iloc[0,-2])
|
65 |
-
print(con.sql("SELECT * FROM duckdb_secrets()").fetchdf().iloc[0,-1])
|
66 |
-
print(con.sql("SELECT * FROM duckdb_secrets()").fetchdf())
|
67 |
|
68 |
-
# FROM 'hf://datasets/reddgr/swift-stock-screener/data/train-00000-of-00001.parquet';
|
69 |
create_table_query = f"""
|
70 |
INSTALL vss;
|
71 |
LOAD vss;
|
72 |
SET hnsw_enable_experimental_persistence = true;
|
73 |
CREATE TABLE vector_table AS
|
74 |
SELECT *, embeddings::float[{emb_model.get_sentence_embedding_dimension()}] as embeddings_float
|
75 |
-
FROM '
|
76 |
"""
|
77 |
|
78 |
con.sql(create_table_query)
|
@@ -83,28 +60,19 @@ create_index_query = f"""
|
|
83 |
"""
|
84 |
con.sql(create_index_query)
|
85 |
|
86 |
-
# print(con.sql("SELECT * FROM duckdb_secrets()").fetchdf())
|
87 |
-
print(f"Created search index. {time.time() - start_time:.2f} seconds.")
|
88 |
-
########################################
|
89 |
-
|
90 |
# ESTADO GLOBAL
|
91 |
last_result_df: pd.DataFrame = pd.DataFrame()
|
92 |
-
|
93 |
-
######################
|
94 |
last_search_type: str = ""
|
95 |
last_search_query: str = ""
|
96 |
-
# last_filtros_values: Tuple = ()
|
97 |
last_column_filters: list[tuple[str, str]] = []
|
98 |
last_sort_col_label: str = ""
|
99 |
last_sort_dir: str = ""
|
100 |
-
#######################
|
101 |
|
102 |
# ---------------------------------------------------------------------------
|
103 |
# CONFIG --------------------------------------------------------------------
|
104 |
# ---------------------------------------------------------------------------
|
105 |
-
app_dataset = load_dataset(
|
106 |
|
107 |
-
# dh_app = fdh.FrontDatasetHandler(app_dataset=pd.read_pickle(DATASET_PATH))
|
108 |
dh_app = fdh.FrontDatasetHandler(app_dataset=app_dataset)
|
109 |
maestro = dh_app.app_dataset[dh_app.app_dataset['quoteType']=='EQUITY'].copy()
|
110 |
maestro_etf = dh_app.app_dataset[dh_app.app_dataset['quoteType']=='ETF'].copy()
|
|
|
1 |
+
'''
|
2 |
+
Swift Stock Screener (SSS)
|
3 |
+
Copyright 2025 David González Romero
|
4 |
+
|
5 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
6 |
+
you may not use this file except in compliance with the License.
|
7 |
+
You may obtain a copy of the License at
|
8 |
+
|
9 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
10 |
+
|
11 |
+
App URL: https://huggingface.co/spaces/reddgr/sss
|
12 |
+
'''
|
13 |
+
|
14 |
+
# cd C:\Users\david\Documents\git\miax-tfm-dgr; python app.py
|
15 |
from pathlib import Path
|
16 |
from typing import Tuple
|
17 |
import pandas as pd
|
|
|
27 |
ROOT = Path(__file__).parent
|
28 |
|
29 |
JSON_PATH = ROOT / "json"
|
30 |
+
DATASET_PATH = "reddgr/swift-stock-screener" # Hugging Face hub dataset name
|
31 |
+
EMB_MODEL_PATH = "FinLang/finance-embeddings-investopedia" # Hugging Face Hub embeddings model name
|
32 |
DOTENV_PATH = ROOT.parent.parent / "apis" / ".env"
|
33 |
+
PARQUET_PATH = ROOT / "parquet" / "app_dataset.parquet"
|
34 |
# DUCKDB_PATH = ROOT / "db" / "sss_vectordb.duckdb"
|
35 |
|
36 |
from src import front_dataset_handler as fdh, app_utils as utils, semantic_search as ss, env_options
|
37 |
tokens = env_options.check_env(use_dotenv=USE_DOTENV, dotenv_path=DOTENV_PATH, env_tokens = ["HF_TOKEN"])
|
38 |
+
|
39 |
+
emb_model = SentenceTransformer(EMB_MODEL_PATH, token = tokens.get("HF_TOKEN"))
|
40 |
+
|
41 |
+
|
42 |
+
#### CONEXIÓN DE DUCKDB CON EL DATASET PARA INDEXAR ####
|
|
|
43 |
print("Initializing DuckDB connection...")
|
44 |
con = duckdb.connect()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
|
|
46 |
create_table_query = f"""
|
47 |
INSTALL vss;
|
48 |
LOAD vss;
|
49 |
SET hnsw_enable_experimental_persistence = true;
|
50 |
CREATE TABLE vector_table AS
|
51 |
SELECT *, embeddings::float[{emb_model.get_sentence_embedding_dimension()}] as embeddings_float
|
52 |
+
FROM '{PARQUET_PATH}';
|
53 |
"""
|
54 |
|
55 |
con.sql(create_table_query)
|
|
|
60 |
"""
|
61 |
con.sql(create_index_query)
|
62 |
|
|
|
|
|
|
|
|
|
63 |
# ESTADO GLOBAL
|
64 |
last_result_df: pd.DataFrame = pd.DataFrame()
|
|
|
|
|
65 |
last_search_type: str = ""
|
66 |
last_search_query: str = ""
|
|
|
67 |
last_column_filters: list[tuple[str, str]] = []
|
68 |
last_sort_col_label: str = ""
|
69 |
last_sort_dir: str = ""
|
|
|
70 |
|
71 |
# ---------------------------------------------------------------------------
|
72 |
# CONFIG --------------------------------------------------------------------
|
73 |
# ---------------------------------------------------------------------------
|
74 |
+
app_dataset = load_dataset(DATASET_PATH, split="train", token = tokens.get("HF_TOKEN")).to_pandas()
|
75 |
|
|
|
76 |
dh_app = fdh.FrontDatasetHandler(app_dataset=app_dataset)
|
77 |
maestro = dh_app.app_dataset[dh_app.app_dataset['quoteType']=='EQUITY'].copy()
|
78 |
maestro_etf = dh_app.app_dataset[dh_app.app_dataset['quoteType']=='ETF'].copy()
|