Spaces:
Running
Running
File size: 1,929 Bytes
682cd17 a5e3343 89cf40b a5e3343 682cd17 a5e3343 682cd17 a5e3343 89cf40b a5e3343 682cd17 89cf40b 682cd17 a5e3343 682cd17 a5e3343 682cd17 89cf40b 682cd17 89cf40b 682cd17 89cf40b 682cd17 89cf40b 682cd17 89cf40b a5e3343 89cf40b a5e3343 89cf40b 682cd17 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
# pipeline_v2.py
import os
import re
import pandas as pd
try:
import yfinance as yf
except Exception as e:
raise ImportError(
"yfinance is not installed. Add `yfinance>=0.2.40` to requirements.txt."
) from e
def _ensure_dir(path: str) -> None:
os.makedirs(path, exist_ok=True)
def _ticker_for_query(t: str) -> str:
"""
Prepare ticker for yfinance:
- strip spaces
- uppercase
- DO NOT alter '.' or '-' (yfinance relies on them, e.g. NESN.SW, BRK-B)
"""
return t.strip().upper()
def _ticker_for_filename(t: str) -> str:
"""
Prepare a safe filename:
- replace any char not [A-Za-z0-9] with '_'
"""
return re.sub(r"[^A-Za-z0-9]", "_", t)
def update_ticker_csv(
ticker: str,
start: str = "2015-01-01",
interval: str = "1d",
dst_dir: str = "/mnt/data"
) -> str:
"""
Download OHLCV for `ticker` using yfinance and save as CSV.
Returns the CSV file path.
"""
_ensure_dir(dst_dir)
tkr_query = _ticker_for_query(ticker)
tkr_file = _ticker_for_filename(tkr_query)
df = yf.download(
tkr_query,
start=start,
interval=interval,
auto_adjust=False,
progress=False,
threads=True,
)
if df is None or df.empty:
raise ValueError(
f"No data returned for ticker '{tkr_query}' (start={start}, interval={interval}). "
"Check the symbol and exchange suffix (e.g., NESN.SW, BMW.DE, VOD.L)."
)
# Ensure a clean Date index
if not isinstance(df.index, pd.DatetimeIndex):
df = df.reset_index()
if "Date" in df.columns:
df = df.set_index("Date")
else:
df.columns = ["Date"] + list(df.columns[1:])
df = df.set_index("Date")
df.index.name = "Date"
csv_path = os.path.join(dst_dir, f"{tkr_file}_{interval}.csv")
df.to_csv(csv_path)
return csv_path
|