automl_pycaret / config_default.yaml
konstantinG's picture
Upload 12 files
dee4d19
raw
history blame
4.48 kB
# Title of the document
title: "Pandas Profiling Report"
# Metadata
dataset:
description: ""
creator: ""
author: ""
copyright_holder: ""
copyright_year: ""
url: ""
variables:
descriptions: {}
# infer dtypes
infer_dtypes: true
# Show the description at each variable (in addition to the overview tab)
show_variable_description: true
# Number of workers (0=multiprocessing.cpu_count())
pool_size: 0
# Show the progress bar
progress_bar: true
# Per variable type description settings
vars:
num:
quantiles:
- 0.05
- 0.25
- 0.5
- 0.75
- 0.95
skewness_threshold: 20
low_categorical_threshold: 5
# Set to zero to disable
chi_squared_threshold: 0.999
cat:
length: true
characters: true
words: true
cardinality_threshold: 50
n_obs: 5
# Set to zero to disable
chi_squared_threshold: 0.999
coerce_str_to_date: false
redact: false
histogram_largest: 50
stop_words: []
bool:
n_obs: 3
# string to boolean mapping dict
mappings:
t: true
f: false
yes: true
no: false
y: true
n: false
true: true
false: false
file:
active: false
image:
active: false
exif: true
hash: true
path:
active: false
url:
active: false
timeseries:
active: false
autocorrelation: 0.7
lags: [1, 7, 12, 24, 30]
significance: 0.05
pacf_acf_lag: 100
# Sort the variables. Possible values: "ascending", "descending" or null (leaves original sorting)
sort: null
# which diagrams to show
missing_diagrams:
bar: true
matrix: true
heatmap: true
correlations:
pearson:
calculate: false
warn_high_correlations: true
threshold: 0.9
spearman:
calculate: false
warn_high_correlations: false
threshold: 0.9
kendall:
calculate: false
warn_high_correlations: false
threshold: 0.9
phi_k:
calculate: false
warn_high_correlations: false
threshold: 0.9
cramers:
calculate: false
warn_high_correlations: true
threshold: 0.9
auto:
calculate: true
warn_high_correlations: true
threshold: 0.9
# Bivariate / Pairwise relations
interactions:
targets: []
continuous: true
# For categorical
categorical_maximum_correlation_distinct: 100
report:
precision: 10
# Plot-specific settings
plot:
# Image format (svg or png)
image_format: "svg"
dpi: 800
scatter_threshold: 1000
correlation:
cmap: 'RdBu'
bad: '#000000'
missing:
cmap: 'RdBu'
# Force labels when there are > 50 variables
# https://github.com/ResidentMario/missingno/issues/93#issuecomment-513322615
force_labels: true
cat_frequency:
show: true # if false, the category frequency plot is turned off
type: 'bar' # options: 'bar', 'pie'
max_unique: 10
colors: null # use null for default or give a list of matplotlib recognised strings
histogram:
x_axis_labels: true
# Number of bins (set to 0 to automatically detect the bin size)
bins: 50
# Maximum number of bins (when bins=0)
max_bins: 250
# The number of observations to show
n_obs_unique: 5
n_extreme_obs: 5
n_freq_table_max: 10
# Use `deep` flag for memory_usage
memory_deep: false
# Configuration related to the duplicates
duplicates:
head: 10
key: "# duplicates"
# Configuration related to the samples area
samples:
head: 10
tail: 10
random: 0
# Configuration related to the rejection of variables
reject_variables: true
# When in a Jupyter notebook
notebook:
iframe:
height: '800px'
width: '100%'
# or 'src'
attribute: 'srcdoc'
html:
# Minify the html
minify_html: true
# Offline support
use_local_assets: true
# If true, single file, else directory with assets
inline: true
# Show navbar
navbar_show: true
# Assets prefix if inline = true
assets_prefix: null
# Styling options for the HTML report
style:
theme: null
logo: ""
primary_colors:
- "#ff4b4b"
- "#ff4b4b"
- "#ff4b4b"
full_width: false