# Title of the document
title: "Pandas Profiling Report"

# Metadata
dataset:
  description: ""
  creator: ""
  author: ""
  copyright_holder: ""
  copyright_year: ""
  url: ""

variables:
  descriptions: {}

# infer dtypes
infer_dtypes: false

# Show the description at each variable (in addition to the overview tab)
show_variable_description: true

# Number of workers (0=multiprocessing.cpu_count())
pool_size: 0

# Show the progress bar
progress_bar: true

# Per variable type description settings
vars:
    num:
        quantiles:
              - 0.05
              - 0.25
              - 0.5
              - 0.75
              - 0.95
        skewness_threshold: 20
        low_categorical_threshold: 5
        # Set to zero to disable
        chi_squared_threshold: 0.0
    cat:
        length: false
        characters: false
        words: false
        cardinality_threshold: 50
        n_obs: 5
        # Set to zero to disable
        chi_squared_threshold: 0.0
        coerce_str_to_date: false
        redact: false
        histogram_largest: 10
        stop_words: []

    bool:
        n_obs: 3
        # string to boolean mapping dict
        mappings:
            t: true
            f: false
            yes: true
            no: false
            y: true
            n: false
            true: true
            false: false
    path:
        active: false
    file:
        active: false
    image:
        active: false
        exif: false
        hash: false
    url:
        active: false
    timeseries:
        active: false
        autocorrelation: 0.7
        lags: [1, 7, 12, 24, 30]
        significance: 0.05
        pacf_acf_lag: 100

# Sort the variables. Possible values: "ascending", "descending" or null (leaves original sorting)
sort: null

# which diagrams to show
missing_diagrams:
    bar: false
    matrix: false
    heatmap: false

correlations:
    pearson:
      calculate: false
      warn_high_correlations: true
      threshold: 0.9
    spearman:
      calculate: false
      warn_high_correlations: false
      threshold: 0.9
    kendall:
      calculate: false
      warn_high_correlations: false
      threshold: 0.9
    phi_k:
      calculate: false
      warn_high_correlations: false
      threshold: 0.9
    cramers:
      calculate: false
      warn_high_correlations: true
      threshold: 0.9
    auto:
       calculate: false
       warn_high_correlations: true
       threshold: 0.9


# Bivariate / Pairwise relations
interactions:
  targets: []
  continuous: false

# For categorical
categorical_maximum_correlation_distinct: 100

report:
  precision: 10

# Plot-specific settings
plot:
    # Image format (svg or png)
    image_format: "svg"
    dpi: 800

    scatter_threshold: 1000

    correlation:
        cmap: 'RdBu'
        bad: '#000000'

    missing:
        cmap: 'RdBu'
        # Force labels when there are > 50 variables
        force_labels: true

    cat_frequency:
        show: true  # if false, the category frequency plot is turned off
        type: 'bar' # options: 'bar', 'pie'
        max_unique: 0
        colors: null # use null for default or give a list of matplotlib recognised strings

    histogram:
        x_axis_labels: true

        # Number of bins (set to 0 to automatically detect the bin size)
        bins: 50

        # Maximum number of bins (when bins=0)
        max_bins: 250

# The number of observations to show
n_obs_unique: 5
n_extreme_obs: 5
n_freq_table_max: 10

# Use `deep` flag for memory_usage
memory_deep: false

# Configuration related to the duplicates
duplicates:
    head: 0
    key: "# duplicates"

# Configuration related to the samples area
samples:
    head: 0
    tail: 0
    random: 0

# Configuration related to the rejection of variables
reject_variables: true

# When in a Jupyter notebook
notebook:
    iframe:
        height: '800px'
        width: '100%'
        # or 'src'
        attribute: 'srcdoc'

html:
    # Minify the html
    minify_html: true

    # Offline support
    use_local_assets: true

    # If true, single file, else directory with assets
    inline: true

    # Show navbar
    navbar_show: true

    # Assets prefix if inline = true
    assets_prefix: null

    # Styling options for the HTML report
    style:
      theme: null
      logo: ""
      primary_colors:
      - "#ff4b4b"
      - "#ff4b4b"
      - "#ff4b4b"

    full_width: false