# Title of the document title: "Pandas Profiling Report" # Metadata dataset: description: "" creator: "" author: "" copyright_holder: "" copyright_year: "" url: "" variables: descriptions: {} # infer dtypes infer_dtypes: false # Show the description at each variable (in addition to the overview tab) show_variable_description: true # Number of workers (0=multiprocessing.cpu_count()) pool_size: 0 # Show the progress bar progress_bar: true # Per variable type description settings vars: num: quantiles: - 0.05 - 0.25 - 0.5 - 0.75 - 0.95 skewness_threshold: 20 low_categorical_threshold: 5 # Set to zero to disable chi_squared_threshold: 0.0 cat: length: false characters: false words: false cardinality_threshold: 50 n_obs: 5 # Set to zero to disable chi_squared_threshold: 0.0 coerce_str_to_date: false redact: false histogram_largest: 10 stop_words: [] bool: n_obs: 3 # string to boolean mapping dict mappings: t: true f: false yes: true no: false y: true n: false true: true false: false path: active: false file: active: false image: active: false exif: false hash: false url: active: false timeseries: active: false autocorrelation: 0.7 lags: [1, 7, 12, 24, 30] significance: 0.05 pacf_acf_lag: 100 # Sort the variables. Possible values: "ascending", "descending" or null (leaves original sorting) sort: null # which diagrams to show missing_diagrams: bar: false matrix: false heatmap: false correlations: pearson: calculate: false warn_high_correlations: true threshold: 0.9 spearman: calculate: false warn_high_correlations: false threshold: 0.9 kendall: calculate: false warn_high_correlations: false threshold: 0.9 phi_k: calculate: false warn_high_correlations: false threshold: 0.9 cramers: calculate: false warn_high_correlations: true threshold: 0.9 auto: calculate: false warn_high_correlations: true threshold: 0.9 # Bivariate / Pairwise relations interactions: targets: [] continuous: false # For categorical categorical_maximum_correlation_distinct: 100 report: precision: 10 # Plot-specific settings plot: # Image format (svg or png) image_format: "svg" dpi: 800 scatter_threshold: 1000 correlation: cmap: 'RdBu' bad: '#000000' missing: cmap: 'RdBu' # Force labels when there are > 50 variables force_labels: true cat_frequency: show: true # if false, the category frequency plot is turned off type: 'bar' # options: 'bar', 'pie' max_unique: 0 colors: null # use null for default or give a list of matplotlib recognised strings histogram: x_axis_labels: true # Number of bins (set to 0 to automatically detect the bin size) bins: 50 # Maximum number of bins (when bins=0) max_bins: 250 # The number of observations to show n_obs_unique: 5 n_extreme_obs: 5 n_freq_table_max: 10 # Use `deep` flag for memory_usage memory_deep: false # Configuration related to the duplicates duplicates: head: 0 key: "# duplicates" # Configuration related to the samples area samples: head: 0 tail: 0 random: 0 # Configuration related to the rejection of variables reject_variables: true # When in a Jupyter notebook notebook: iframe: height: '800px' width: '100%' # or 'src' attribute: 'srcdoc' html: # Minify the html minify_html: true # Offline support use_local_assets: true # If true, single file, else directory with assets inline: true # Show navbar navbar_show: true # Assets prefix if inline = true assets_prefix: null # Styling options for the HTML report style: theme: null logo: "" primary_colors: - "#ff4b4b" - "#ff4b4b" - "#ff4b4b" full_width: false